381 files changed, 18653 insertions, 4194 deletions
diff --git a/Makefile b/Makefile
index 6ea55336b7a..e59441c4329 100644
--- a/Makefile
+++ b/Makefile
@@ -77,7 +77,6 @@ ifeq ($(MAKECMDGOALS),install-clang)
 endif
 
 ifeq ($(MAKECMDGOALS),clang-only)
-  BUILD_CLANG_ONLY := YES
   DIRS := $(filter-out tools docs unittests, $(DIRS)) \
           tools/clang tools/lto
   OPTIONAL_DIRS :=
diff --git a/Makefile.rules b/Makefile.rules
index b929ffea59f..b2b02c25d44 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -571,7 +571,11 @@ endif
 #--------------------------------------------------------------------
 
 ifeq ($(HOST_OS),Darwin)
+ ifdef MACOSX_DEPLOYMENT_TARGET
+  DARWIN_VERSION := $(MACOSX_DEPLOYMENT_TARGET)
+ else
   DARWIN_VERSION := `sw_vers -productVersion`
+ endif
   # Strip a number like 10.4.7 to 10.4
   DARWIN_VERSION := $(shell echo $(DARWIN_VERSION)| sed -E 's/(10.[0-9]).*/\1/')
   # Get "4" out of 10.4 for later pieces in the makefile.
diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst
index d3995e7036b..bd26f7b1502 100644
--- a/docs/BitCodeFormat.rst
+++ b/docs/BitCodeFormat.rst
@@ -489,6 +489,8 @@ The magic number for LLVM IR files is:
 When combined with the bitcode magic number and viewed as bytes, this is
 ``"BC 0xC0DE"``.
 
+.. _Signed VBRs:
+
 Signed VBRs
 ^^^^^^^^^^^
 
@@ -507,6 +509,7 @@ As such, signed VBR values of a specific width are emitted as follows:
 With this encoding, small positive and small negative values can both be emitted
 efficiently. Signed VBR encoding is used in ``CST_CODE_INTEGER`` and
 ``CST_CODE_WIDE_INTEGER`` records within ``CONSTANTS_BLOCK`` blocks.
+It is also used for phi instruction operands in `MODULE_CODE_VERSION`_ 1.
 
 LLVM IR Blocks
 ^^^^^^^^^^^^^^
@@ -553,13 +556,57 @@ block may contain the following sub-blocks:
 * `FUNCTION_BLOCK`_
 * `METADATA_BLOCK`_
 
+.. _MODULE_CODE_VERSION:
+
 MODULE_CODE_VERSION Record
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ``[VERSION, version#]``
 
 The ``VERSION`` record (code 1) contains a single value indicating the format
-version. Only version 0 is supported at this time.
+version. Versions 0 and 1 are supported at this time. The difference between
+version 0 and 1 is in the encoding of instruction operands in
+each `FUNCTION_BLOCK`_.
+
+In version 0, each value defined by an instruction is assigned an ID
+unique to the function. Function-level value IDs are assigned starting from
+``NumModuleValues`` since they share the same namespace as module-level
+values. The value enumerator resets after each function. When a value is
+an operand of an instruction, the value ID is used to represent the operand.
+For large functions or large modules, these operand values can be large.
+
+The encoding in version 1 attempts to avoid large operand values
+in common cases. Instead of using the value ID directly, operands are
+encoded as relative to the current instruction. Thus, if an operand
+is the value defined by the previous instruction, the operand
+will be encoded as 1.
+
+For example, instead of
+
+.. code-block:: llvm
+
+  #n = load #n-1
+  #n+1 = icmp eq #n, #const0
+  br #n+1, label #(bb1), label #(bb2)
+
+version 1 will encode the instructions as
+
+.. code-block:: llvm
+
+  #n = load #1
+  #n+1 = icmp eq #1, (#n+1)-#const0
+  br #1, label #(bb1), label #(bb2)
+
+Note in the example that operands which are constants also use
+the relative encoding, while operands like basic block labels
+do not use the relative encoding.
+
+Forward references will result in a negative value.
+This can be inefficient, as operands are normally encoded
+as unsigned VBRs. However, forward references are rare, except in the
+case of phi instructions. For phi instructions, operands are encoded as
+`Signed VBRs`_ to deal with forward references.
+
 
 MODULE_CODE_TRIPLE Record
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst
index f940dbc579a..e35e7295564 100644
--- a/docs/DeveloperPolicy.rst
+++ b/docs/DeveloperPolicy.rst
@@ -137,6 +137,9 @@ reviewees. If someone is kind enough to review your code, you should return the
 favor for someone else.  Note that anyone is welcome to review and give feedback
 on a patch, but only people with Subversion write access can approve it.
 
+There is a web based code review tool that can optionally be used
+for code reviews. See :doc:`Phabricator`.
+
 Code Owners
 -----------
 
diff --git a/docs/ExtendingLLVM.rst b/docs/ExtendingLLVM.rst
index e41cfd996e5..6df08eee985 100644
--- a/docs/ExtendingLLVM.rst
+++ b/docs/ExtendingLLVM.rst
@@ -270,7 +270,7 @@ Adding a derived type
 
    add support for derived type to:
 
-   .. code:: c++
+   .. code-block:: c++
 
      std::string getTypeDescription(const Type &Ty,
                                     std::vector<const Type*> &TypeStack)
@@ -296,7 +296,7 @@ Adding a derived type
 
    modify
 
-   .. code:: c++
+   .. code-block:: c++
 
      void calcTypeName(const Type *Ty,
                        std::vector<const Type*> &TypeStack,
diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst
index d78c78f1cc2..68768921f6a 100644
--- a/docs/GettingStarted.rst
+++ b/docs/GettingStarted.rst
@@ -505,7 +505,7 @@ directory:
 If you would like to get the LLVM test suite (a separate package as of 1.4), you
 get it from the Subversion repository:
 
-.. code:: bash
+.. code-block:: bash
 
   % cd llvm/projects
   % svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
@@ -523,13 +523,13 @@ marks (so, you can recreate git-svn metadata locally). Note that right now
 mirrors reflect only ``trunk`` for each project. You can do the read-only GIT
 clone of LLVM via:
 
-.. code:: bash
+.. code-block:: bash
 
   % git clone http://llvm.org/git/llvm.git
 
 If you want to check out clang too, run:
 
-.. code:: bash
+.. code-block:: bash
 
   % git clone http://llvm.org/git/llvm.git
   % cd llvm/tools
@@ -540,26 +540,26 @@ pull --rebase`` instead of ``git pull`` to avoid generating a non-linear history
 in your clone.  To configure ``git pull`` to pass ``--rebase`` by default on the
 master branch, run the following command:
 
-.. code:: bash
+.. code-block:: bash
 
   % git config branch.master.rebase true
 
 Sending patches with Git
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
-Please read `Developer Policy <DeveloperPolicy.html#patches>`_, too.
+Please read `Developer Policy <DeveloperPolicy.html#one-off-patches>`_, too.
 
 Assume ``master`` points the upstream and ``mybranch`` points your working
 branch, and ``mybranch`` is rebased onto ``master``.  At first you may check
 sanity of whitespaces:
 
-.. code:: bash
+.. code-block:: bash
 
   % git diff --check master..mybranch
 
 The easiest way to generate a patch is as below:
 
-.. code:: bash
+.. code-block:: bash
 
   % git diff master..mybranch > /path/to/mybranch.diff
 
@@ -570,20 +570,20 @@ could be accepted with ``patch -p1 -N``.
 But you may generate patchset with git-format-patch. It generates by-each-commit
 patchset. To generate patch files to attach to your article:
 
-.. code:: bash
+.. code-block:: bash
 
   % git format-patch --no-attach master..mybranch -o /path/to/your/patchset
 
 If you would like to send patches directly, you may use git-send-email or
 git-imap-send. Here is an example to generate the patchset in Gmail's [Drafts].
 
-.. code:: bash
+.. code-block:: bash
 
   % git format-patch --attach master..mybranch --stdout | git imap-send
 
 Then, your .git/config should have [imap] sections.
 
-.. code:: bash
+.. code-block:: bash
 
   [imap]
         host = imaps://imap.gmail.com
@@ -594,16 +594,16 @@ Then, your .git/config should have [imap] sections.
   ; in English
         folder = "[Gmail]/Drafts"
   ; example for Japanese, "Modified UTF-7" encoded.
-        folder = "[Gmail]/&amp;Tgtm+DBN-"
+        folder = "[Gmail]/&Tgtm+DBN-"
   ; example for Traditional Chinese
-        folder = "[Gmail]/&amp;g0l6Pw-"
+        folder = "[Gmail]/&g0l6Pw-"
 
 For developers to work with git-svn
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 To set up clone from which you can submit code using ``git-svn``, run:
 
-.. code:: bash
+.. code-block:: bash
 
   % git clone http://llvm.org/git/llvm.git
   % cd llvm
@@ -622,7 +622,7 @@ To set up clone from which you can submit code using ``git-svn``, run:
 To update this clone without generating git-svn tags that conflict with the
 upstream git repo, run:
 
-.. code:: bash
+.. code-block:: bash
 
   % git fetch && (cd tools/clang && git fetch)  # Get matching revisions of both trees.
   % git checkout master
@@ -640,7 +640,7 @@ The git-svn metadata can get out of sync after you mess around with branches and
 ``dcommit``. When that happens, ``git svn dcommit`` stops working, complaining
 about files with uncommitted changes. The fix is to rebuild the metadata:
 
-.. code:: bash
+.. code-block:: bash
 
   % rm -rf .git/svn
   % git svn rebase -l
@@ -722,13 +722,13 @@ To configure LLVM, follow these steps:
 
 #. Change directory into the object root directory:
 
-   .. code:: bash
+   .. code-block:: bash
 
      % cd OBJ_ROOT
 
 #. Run the ``configure`` script located in the LLVM source tree:
 
-   .. code:: bash
+   .. code-block:: bash
 
      % SRC_ROOT/configure --prefix=/install/path [other options]
 
@@ -764,7 +764,7 @@ Profile Builds
 Once you have LLVM configured, you can build it by entering the *OBJ_ROOT*
 directory and issuing the following command:
 
-.. code:: bash
+.. code-block:: bash
 
   % gmake
 
@@ -775,7 +775,7 @@ If you have multiple processors in your machine, you may wish to use some of the
 parallel build options provided by GNU Make.  For example, you could use the
 command:
 
-.. code:: bash
+.. code-block:: bash
 
   % gmake -j2
 
@@ -866,13 +866,13 @@ This is accomplished in the typical autoconf manner:
 
 * Change directory to where the LLVM object files should live:
 
-  .. code:: bash
+  .. code-block:: bash
 
     % cd OBJ_ROOT
 
 * Run the ``configure`` script found in the LLVM source directory:
 
-  .. code:: bash
+  .. code-block:: bash
 
     % SRC_ROOT/configure
 
@@ -918,7 +918,7 @@ module, and you have root access on the system, you can set your system up to
 execute LLVM bitcode files directly. To do this, use commands like this (the
 first command may not be required if you are already using the module):
 
-.. code:: bash
+.. code-block:: bash
 
   % mount -t binfmt_misc none /proc/sys/fs/binfmt_misc
   % echo ':llvm:M::BC::/path/to/lli:' > /proc/sys/fs/binfmt_misc/register
@@ -928,7 +928,7 @@ first command may not be required if you are already using the module):
 This allows you to execute LLVM bitcode files directly.  On Debian, you can also
 use this command instead of the 'echo' command above:
 
-.. code:: bash
+.. code-block:: bash
 
   % sudo update-binfmts --install llvm /path/to/lli --magic 'BC'
 
@@ -1208,7 +1208,7 @@ Example with clang
 
 #. First, create a simple C file, name it 'hello.c':
 
-   .. code:: c
+   .. code-block:: c
 
      #include <stdio.h>
 
@@ -1219,7 +1219,7 @@ Example with clang
 
 #. Next, compile the C file into a native executable:
 
-   .. code:: bash
+   .. code-block:: bash
 
      % clang hello.c -o hello
 
@@ -1230,7 +1230,7 @@ Example with clang
 
 #. Next, compile the C file into a LLVM bitcode file:
 
-   .. code:: bash
+   .. code-block:: bash
 
      % clang -O3 -emit-llvm hello.c -c -o hello.bc
 
@@ -1240,13 +1240,13 @@ Example with clang
 
 #. Run the program in both forms. To run the program, use:
 
-   .. code:: bash
+   .. code-block:: bash
 
       % ./hello
  
    and
 
-   .. code:: bash
+   .. code-block:: bash
 
      % lli hello.bc
 
@@ -1255,19 +1255,19 @@ Example with clang
 
 #. Use the ``llvm-dis`` utility to take a look at the LLVM assembly code:
 
-   .. code:: bash
+   .. code-block:: bash
 
      % llvm-dis < hello.bc | less
 
 #. Compile the program to native assembly using the LLC code generator:
 
-   .. code:: bash
+   .. code-block:: bash
 
      % llc hello.bc -o hello.s
 
 #. Assemble the native assembly language file into a program:
 
-   .. code:: bash
+   .. code-block:: bash
 
      **Solaris:** % /opt/SUNWspro/bin/cc -xarch=v9 hello.s -o hello.native
 
@@ -1275,7 +1275,7 @@ Example with clang
 
 #. Execute the native code program:
 
-   .. code:: bash
+   .. code-block:: bash
 
      % ./hello.native
 
diff --git a/docs/HowToSetUpLLVMStyleRTTI.rst b/docs/HowToSetUpLLVMStyleRTTI.rst
index b5c1b78afeb..aa1ad84afee 100644
--- a/docs/HowToSetUpLLVMStyleRTTI.rst
+++ b/docs/HowToSetUpLLVMStyleRTTI.rst
@@ -65,10 +65,9 @@ steps:
 
       #include "llvm/Support/Casting.h"
 
-
 #. In the base class, introduce an enum which discriminates all of the
-   different classes in the hierarchy, and stash the enum value somewhere in
-   the base class.
+   different concrete classes in the hierarchy, and stash the enum value
+   somewhere in the base class.
 
    Here is the code after introducing this change:
 
@@ -78,8 +77,8 @@ steps:
        public:
       +  /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
       +  enum ShapeKind {
-      +    SquareKind,
-      +    CircleKind
+      +    SK_Square,
+      +    SK_Circle
       +  };
       +private:
       +  const ShapeKind Kind;
@@ -103,7 +102,7 @@ steps:
    You might wonder why the ``Kind`` enum doesn't have an entry for
    ``Shape``. The reason for this is that since ``Shape`` is abstract
    (``computeArea() = 0;``), you will never actually have non-derived
-   instances of exactly that class (only subclasses).  See `Concrete Bases
+   instances of exactly that class (only subclasses). See `Concrete Bases
    and Deeper Hierarchies`_ for information on how to deal with
    non-abstract bases. It's worth mentioning here that unlike
    ``dynamic_cast<>``, LLVM-style RTTI can be used (and is often used) for
@@ -122,8 +121,8 @@ steps:
        public:
          /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
          enum ShapeKind {
-           SquareKind,
-           CircleKind
+           SK_Square,
+           SK_Circle
          };
        private:
          const ShapeKind Kind;
@@ -139,7 +138,7 @@ steps:
          double SideLength;
        public:
       -  Square(double S) : SideLength(S) {}
-      +  Square(double S) : Shape(SquareKind), SideLength(S) {}
+      +  Square(double S) : Shape(SK_Square), SideLength(S) {}
          double computeArea() /* override */;
        };
 
@@ -147,7 +146,7 @@ steps:
          double Radius;
        public:
       -  Circle(double R) : Radius(R) {}
-      +  Circle(double R) : Shape(CircleKind), Radius(R) {}
+      +  Circle(double R) : Shape(SK_Circle), Radius(R) {}
          double computeArea() /* override */;
        };
 
@@ -164,8 +163,8 @@ steps:
        public:
          /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
          enum ShapeKind {
-           SquareKind,
-           CircleKind
+           SK_Square,
+           SK_Circle
          };
        private:
          const ShapeKind Kind;
@@ -174,53 +173,72 @@ steps:
 
          Shape(ShapeKind K) : Kind(K) {}
          virtual double computeArea() = 0;
-      +
-      +  static bool classof(const Shape *) { return true; }
        };
 
        class Square : public Shape {
          double SideLength;
        public:
-         Square(double S) : Shape(SquareKind), SideLength(S) {}
+         Square(double S) : Shape(SK_Square), SideLength(S) {}
          double computeArea() /* override */;
       +
-      +  static bool classof(const Square *) { return true; }
       +  static bool classof(const Shape *S) {
-      +    return S->getKind() == SquareKind;
+      +    return S->getKind() == SK_Square;
       +  }
        };
 
        class Circle : public Shape {
          double Radius;
        public:
-         Circle(double R) : Shape(CircleKind), Radius(R) {}
+         Circle(double R) : Shape(SK_Circle), Radius(R) {}
          double computeArea() /* override */;
       +
-      +  static bool classof(const Circle *) { return true; }
       +  static bool classof(const Shape *S) {
-      +    return S->getKind() == CircleKind;
+      +    return S->getKind() == SK_Circle;
       +  }
        };
 
-   Basically, the job of ``classof`` is to return ``true`` if its argument
-   is of the enclosing class's type. As you can see, there are two general
-   overloads of ``classof`` in use here.
+   The job of ``classof`` is to dynamically determine whether an object of
+   a base class is in fact of a particular derived class.  In order to
+   downcast a type ``Base`` to a type ``Derived``, there needs to be a
+   ``classof`` in ``Derived`` which will accept an object of type ``Base``.
+
+   To be concrete, consider the following code:
+
+   .. code-block:: c++
+
+      Shape *S = ...;
+      if (isa<Circle>(S)) {
+        /* do something ... */
+      }
 
-   #. The first, which just returns ``true``, means that if we know that the
-      argument of the cast is of the enclosing type *at compile time*, then
-      we don't need to bother to check anything since we already know that
-      the type is convertible. This is an optimization for the case that we
-      statically know the conversion is OK.
+   The code of the ``isa<>`` test in this code will eventually boil
+   down---after template instantiation and some other machinery---to a
+   check roughly like ``Circle::classof(S)``. For more information, see
+   :ref:`classof-contract`.
 
-   #. The other overload takes a pointer to an object of the base of the
-      class hierarchy: this is the "general case" of the cast. We need to
-      check the ``Kind`` to dynamically decide if the argument is of (or
-      derived from) the enclosing type.
+   The argument to ``classof`` should always be an *ancestor* class because
+   the implementation has logic to allow and optimize away
+   upcasts/up-``isa<>``'s automatically. It is as though every class
+   ``Foo`` automatically has a ``classof`` like:
+
+   .. code-block:: c++
 
-   To be more precise, let ``classof`` be inside a class ``C``.  Then the
-   contract for ``classof`` is "return ``true`` if the argument is-a
-   ``C``". As long as your implementation fulfills this contract, you can
-   tweak and optimize it as much as you want.
+      class Foo {
+        [...]
+        template <class T>
+        static bool classof(const T *,
+                            ::llvm::enable_if_c<
+                              ::llvm::is_base_of<Foo, T>::value
+                            >::type* = 0) { return true; }
+        [...]
+      };
+
+   Note that this is the reason that we did not need to introduce a
+   ``classof`` into ``Shape``: all relevant classes derive from ``Shape``,
+   and ``Shape`` itself is abstract (has no entry in the ``Kind`` enum),
+   so this notional inferred ``classof`` is all we need. See `Concrete
+   Bases and Deeper Hierarchies`_ for more information about how to extend
+   this example to more general hierarchies.
 
 Although for this small example setting up LLVM-style RTTI seems like a lot
 of "boilerplate", if your classes are doing anything interesting then this
@@ -231,29 +249,37 @@ Concrete Bases and Deeper Hierarchies
 
 For concrete bases (i.e. non-abstract interior nodes of the inheritance
 tree), the ``Kind`` check inside ``classof`` needs to be a bit more
-complicated. Say that ``SpecialSquare`` and ``OtherSpecialSquare`` derive
+complicated. The situation differs from the example above in that
+
+* Since the class is concrete, it must itself have an entry in the ``Kind``
+  enum because it is possible to have objects with this class as a dynamic
+  type.
+
+* Since the class has children, the check inside ``classof`` must take them
+  into account.
+
+Say that ``SpecialSquare`` and ``OtherSpecialSquare`` derive
 from ``Square``, and so ``ShapeKind`` becomes:
 
 .. code-block:: c++
 
     enum ShapeKind {
-      SquareKind,
-   +  SpecialSquareKind,
-   +  OtherSpecialSquareKind,
-      CircleKind
+      SK_Square,
+   +  SK_SpecialSquare,
+   +  SK_OtherSpecialSquare,
+      SK_Circle
     }
 
 Then in ``Square``, we would need to modify the ``classof`` like so:
 
 .. code-block:: c++
 
-      static bool classof(const Square *) { return true; }
    -  static bool classof(const Shape *S) {
-   -    return S->getKind() == SquareKind;
+   -    return S->getKind() == SK_Square;
    -  }
    +  static bool classof(const Shape *S) {
-   +    return S->getKind() >= SquareKind &&
-   +           S->getKind() <= OtherSpecialSquareKind;
+   +    return S->getKind() >= SK_Square &&
+   +           S->getKind() <= SK_OtherSpecialSquare;
    +  }
 
 The reason that we need to test a range like this instead of just equality
@@ -273,9 +299,34 @@ ordering right::
        | OtherSpecialSquare
      | Circle
 
+.. _classof-contract:
+
+The Contract of ``classof``
+---------------------------
+
+To be more precise, let ``classof`` be inside a class ``C``.  Then the
+contract for ``classof`` is "return ``true`` if the dynamic type of the
+argument is-a ``C``".  As long as your implementation fulfills this
+contract, you can tweak and optimize it as much as you want.
+
 .. TODO::
 
    Touch on some of the more advanced features, like ``isa_impl`` and
    ``simplify_type``. However, those two need reference documentation in
    the form of doxygen comments as well. We need the doxygen so that we can
    say "for full details, see http://llvm.org/doxygen/..."
+
+Rules of Thumb
+==============
+
+#. The ``Kind`` enum should have one entry per concrete class, ordered
+   according to a preorder traversal of the inheritance tree.
+#. The argument to ``classof`` should be a ``const Base *``, where ``Base``
+   is some ancestor in the inheritance hierarchy. The argument should
+   *never* be a derived class or the class itself: the template machinery
+   for ``isa<>`` already handles this case and optimizes it.
+#. For each class in the hierarchy that has no children, implement a
+   ``classof`` that checks only against its ``Kind``.
+#. For each class in the hierarchy that has children, implement a
+   ``classof`` that checks a range of the first child's ``Kind`` and the
+   last child's ``Kind``.
diff --git a/docs/Lexicon.rst b/docs/Lexicon.rst
index 6ebe61429f9..d568c0b302e 100644
--- a/docs/Lexicon.rst
+++ b/docs/Lexicon.rst
@@ -20,8 +20,10 @@ A
 B
 -
 
-**BURS**
+**BB Vectorization**
+    Basic Block Vectorization
 
+**BURS**
     Bottom Up Rewriting System --- A method of instruction selection for code
     generation.  An example is the `BURG
     <http://www.program-transformation.org/Transform/BURG>`_ tool.
@@ -156,7 +158,7 @@ R
     In garbage collection, a pointer variable lying outside of the `heap`_ from
     which the collector begins its reachability analysis. In the context of code
     generation, "root" almost always refers to a "stack root" --- a local or
-    temporary variable within an executing function.</dd>
+    temporary variable within an executing function.
 
 **RPO**
     Reverse postorder
@@ -192,3 +194,10 @@ S
 **Stack Map**
     In garbage collection, metadata emitted by the code generator which
     identifies `roots`_ within the stack frame of an executing function.
+
+T
+-
+
+**TBAA**
+    Type-Based Alias Analysis
+
diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst
new file mode 100644
index 00000000000..cd984b09be3
--- /dev/null
+++ b/docs/Phabricator.rst
@@ -0,0 +1,94 @@
+=============================
+Code Reviews with Phabricator
+=============================
+
+.. contents::
+  :local:
+
+If you prefer to use a web user interface for code reviews,
+you can now submit your patches for Clang and LLVM at
+`LLVM's Phabricator`_.
+
+Sign up
+-------
+
+Sign up with one of the supported OAuth account types. If
+you use your Subversion user name as Phabricator user name,
+Phabricator will automatically connect your submits to your
+Phabricator user in the `Code Repository Browser`_.
+
+
+Requesting a review via the command line
+----------------------------------------
+
+Phabricator has a tool called *Arcanist* to upload patches from
+the command line. To get you set up, follow the
+`Arcanist Quick Start`_ instructions.
+
+You can learn more about how to use arc to interact with
+Phabricator in the `Arcanist User Guide`_.
+
+Requesting a review via the web interface
+-----------------------------------------
+
+The tool to create and review patches in Phabricator is called
+*Differential*.
+
+Note that you can upload patches created through various diff tools,
+including git and svn. To make reviews easier, please always include
+**as much context as possible** with your diff! Don't worry, Phabricator
+will automatically send a diff with a smaller context in the review
+email, but having the full file in the web interface will help the
+reviewer understand your code.
+
+To get a full diff, use one of the following commands (or just use Arcanist
+to upload your patch):
+
+* git diff -U999999 other-branch
+* svn diff --diff-cmd=diff -x -U999999
+
+To upload a new patch:
+
+* Click *Differential*.
+* Click *Create Revision*.
+* Paste the text diff or upload the patch file.
+  Note that TODO
+* Leave the drop down on *Create a new Revision...* and click *Continue*.
+* Enter a descriptive title and summary; add reviewers and mailing
+  lists that you want to be included in the review. If your patch is
+  for LLVM, cc llvm-commits; if your patch is for Clang, cc cfe-commits.
+* Click *Save*.
+
+To submit an updated patch:
+
+* Click *Differential*.
+* Click *Create Revision*.
+* Paste the updated diff.
+* Select the review you want to from the *Attach To* dropdown and click
+  *Continue*.
+* Click *Save*.
+
+Reviewing code with Phabricator
+-------------------------------
+
+Phabricator allows you to add inline comments as well as overall comments
+to a revision. To add an inline comment, select the lines of code you want
+to comment on by clicking and dragging the line numbers in the diff pane.
+
+You can add overall comments or submit your comments at the bottom of the page.
+
+Phabricator has many useful features, for example allowing you to select
+diffs between different versions of the patch as it was reviewed in the
+*Revision Update History*. Most features are self descriptive - explore, and
+if you have a question, drop by on #llvm in IRC to get help.
+
+Status
+------
+
+Currently, we're testing Phabricator for use with Clang/LLVM. Please let us
+know whether you like it and what could be improved!
+
+.. _LLVM's Phabricator: http://llvm-reviews.chandlerc.com
+.. _Code Repository Browser: http://llvm-reviews.chandlerc.com/diffusion/
+.. _Arcanist Quick Start: http://www.phabricator.com/docs/phabricator/article/Arcanist_Quick_Start.html
+.. _Arcanist User Guide: http://www.phabricator.com/docs/phabricator/article/Arcanist_User_Guide.html
diff --git a/docs/SphinxQuickstartTemplate.rst b/docs/SphinxQuickstartTemplate.rst
new file mode 100644
index 00000000000..75d916368e3
--- /dev/null
+++ b/docs/SphinxQuickstartTemplate.rst
@@ -0,0 +1,125 @@
+==========================
+Sphinx Quickstart Template
+==========================
+
+.. sectionauthor:: Sean Silva <silvas@purdue.edu>
+
+Introduction and Quickstart
+===========================
+
+This document is meant to get you writing documentation as fast as possible
+even if you have no previous experience with Sphinx. The goal is to take
+someone in the state of "I want to write documentation and get it added to
+LLVM's docs" and turn that into useful documentation mailed to llvm-commits
+with as little nonsense as possible.
+
+You can find this document in ``docs/SphinxQuickstartTemplate.rst``. You
+should copy it, open the new file in your text editor, write your docs, and
+then send the new document to llvm-commits for review.
+
+Focus on *content*. It is easy to fix the Sphinx (reStructuredText) syntax
+later if necessary, although reStructuredText tries to imitate common
+plain-text conventions so it should be quite natural. A basic knowledge of
+reStructuredText syntax is useful when writing the document, so the last
+~half of this document (starting with `Example Section`_) gives examples
+which should cover 99% of use cases.
+
+Let me say that again: focus on *content*.
+
+Once you have finished with the content, please send the ``.rst`` file to
+llvm-commits for review.
+
+Guidelines
+==========
+
+Try to answer the following questions in your first section:
+
+#. Why would I want to read this document?
+
+#. What should I know to be able to follow along with this document?
+
+#. What will I have learned by the end of this document?
+
+Common names for the first section are ``Introduction``, ``Overview``, or
+``Background``.
+
+If possible, make your document a "how to". Give it a name ``HowTo*.rst``
+like the other "how to" documents. This format is usually the easiest
+for another person to understand and also the most useful.
+
+You generally should not be writing documentation other than a "how to"
+unless there is already a "how to" about your topic. The reason for this
+is that without a "how to" document to read first, it is difficult for a
+person to understand a more advanced document.
+
+Focus on content (yes, I had to say it again).
+
+The rest of this document shows example reStructuredText markup constructs
+that are meant to be read by you in your text editor after you have copied
+this file into a new file for the documentation you are about to write.
+
+Example Section
+===============
+
+Your text can be *emphasized*, **bold**, or ``monospace``.
+
+Use blank lines to separate paragraphs.
+
+Headings (like ``Example Section`` just above) give your document
+structure. Use the same kind of adornments (e.g. ``======`` vs. ``------``)
+as are used in this document. The adornment must be the same length as the
+text above it. For Vim users, variations of ``yypVr=`` might be handy.
+
+Example Subsection
+------------------
+
+Make a link `like this <http://llvm.org/>`_. There is also a more
+sophisticated syntax which `can be more readable`_ for longer links since
+it disrupts the flow less. You can put the ``.. _`link text`: <URL>`` block
+pretty much anywhere later in the document.
+
+.. _`can be more readable`: http://en.wikipedia.org/wiki/LLVM
+
+Lists can be made like this:
+
+#. A list starting with ``#.`` will be automatically numbered.
+
+#. This is a second list element.
+
+   #. They nest too.
+
+You can also use unordered lists.
+
+* Stuff.
+
+  + Deeper stuff.
+
+* More stuff.
+
+Example Subsubsection
+^^^^^^^^^^^^^^^^^^^^^
+
+You can make blocks of code like this:
+
+.. code-block:: c++
+
+   int main() {
+     return 0
+   }
+
+For a shell session, use a ``bash`` code block:
+
+.. code-block:: bash
+
+   $ echo "Goodbye cruel world!"
+   $ rm -rf /
+
+If you need to show LLVM IR use the ``llvm`` code block.
+
+Hopefully you won't need to be this deep
+""""""""""""""""""""""""""""""""""""""""
+
+If you need to do fancier things than what has been shown in this document,
+you can mail the list or check Sphinx's `reStructuredText Primer`_.
+
+.. _`reStructuredText Primer`: http://sphinx.pocoo.org/rest.html
diff --git a/docs/userguides.rst b/docs/userguides.rst
index 6ff46ade480..8c1554dfce9 100644
--- a/docs/userguides.rst
+++ b/docs/userguides.rst
@@ -18,6 +18,8 @@ User Guides
    HowToAddABuilder
    yaml2obj
    HowToSubmitABug
+   SphinxQuickstartTemplate
+   Phabricator
 
 * :ref:`getting_started`
     
@@ -70,6 +72,10 @@ User Guides
     
    Instructions for properly submitting information about any bugs you run into
    in the LLVM system.
+* :doc:`SphinxQuickstartTemplate`
+
+  A template + tutorial for writing new Sphinx documentation. It is meant
+  to be read in source form.
     
 * `LLVM Testing Infrastructure Guide <TestingGuide.html>`_
 
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index 56d4d81b5c7..215cb4d3714 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -10,13 +10,13 @@
 // Demo program which implements an example LLVM exception implementation, and
 // shows several test cases including the handling of foreign exceptions.
 // It is run with type info types arguments to throw. A test will
-// be run for each given type info type. While type info types with the value 
+// be run for each given type info type. While type info types with the value
 // of -1 will trigger a foreign C++ exception to be thrown; type info types
-// <= 6 and >= 1 will cause the associated generated exceptions to be thrown 
+// <= 6 and >= 1 will cause the associated generated exceptions to be thrown
 // and caught by generated test functions; and type info types > 6
 // will result in exceptions which pass through to the test harness. All other
 // type info types are not supported and could cause a crash. In all cases,
-// the "finally" blocks of every generated test functions will executed 
+// the "finally" blocks of every generated test functions will executed
 // regardless of whether or not that test function ignores or catches the
 // thrown exception.
 //
@@ -25,25 +25,25 @@
 // ExceptionDemo
 //
 //     causes a usage to be printed to stderr
-// 
+//
 // ExceptionDemo 2 3 7 -1
 //
 //     results in the following cases:
-//         - Value 2 causes an exception with a type info type of 2 to be 
+//         - Value 2 causes an exception with a type info type of 2 to be
 //           thrown and caught by an inner generated test function.
-//         - Value 3 causes an exception with a type info type of 3 to be 
+//         - Value 3 causes an exception with a type info type of 3 to be
 //           thrown and caught by an outer generated test function.
-//         - Value 7 causes an exception with a type info type of 7 to be 
+//         - Value 7 causes an exception with a type info type of 7 to be
 //           thrown and NOT be caught by any generated function.
 //         - Value -1 causes a foreign C++ exception to be thrown and not be
 //           caught by any generated function
 //
 //     Cases -1 and 7 are caught by a C++ test harness where the validity of
-//         of a C++ catch(...) clause catching a generated exception with a 
-//         type info type of 7 is explained by: example in rules 1.6.4 in 
+//         of a C++ catch(...) clause catching a generated exception with a
+//         type info type of 7 is explained by: example in rules 1.6.4 in
 //         http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22)
 //
-// This code uses code from the llvm compiler-rt project and the llvm 
+// This code uses code from the llvm compiler-rt project and the llvm
 // Kaleidoscope project.
 //
 //===----------------------------------------------------------------------===//
@@ -63,12 +63,12 @@
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/TargetSelect.h"
 
-// FIXME: Although all systems tested with (Linux, OS X), do not need this 
-//        header file included. A user on ubuntu reported, undefined symbols 
+// FIXME: Although all systems tested with (Linux, OS X), do not need this
+//        header file included. A user on ubuntu reported, undefined symbols
 //        for stderr, and fprintf, and the addition of this include fixed the
-//        issue for them. Given that LLVM's best practices include the goal 
-//        of reducing the number of redundant header files included, the 
-//        correct solution would be to find out why these symbols are not 
+//        issue for them. Given that LLVM's best practices include the goal
+//        of reducing the number of redundant header files included, the
+//        correct solution would be to find out why these symbols are not
 //        defined for the system in question, and fix the issue by finding out
 //        which LLVM header file, if any, would include these symbols.
 #include <cstdio>
@@ -81,11 +81,11 @@
 #define USE_GLOBAL_STR_CONSTS true
 #endif
 
-// System C++ ABI unwind types from: 
+// System C++ ABI unwind types from:
 //     http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22)
 
 extern "C" {
-  
+
   typedef enum {
     _URC_NO_REASON = 0,
     _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
@@ -97,7 +97,7 @@ extern "C" {
     _URC_INSTALL_CONTEXT = 7,
     _URC_CONTINUE_UNWIND = 8
   } _Unwind_Reason_Code;
-  
+
   typedef enum {
     _UA_SEARCH_PHASE = 1,
     _UA_CLEANUP_PHASE = 2,
@@ -105,34 +105,34 @@ extern "C" {
     _UA_FORCE_UNWIND = 8,
     _UA_END_OF_STACK = 16
   } _Unwind_Action;
-  
+
   struct _Unwind_Exception;
-  
+
   typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code,
                                                 struct _Unwind_Exception *);
-  
+
   struct _Unwind_Exception {
     uint64_t exception_class;
     _Unwind_Exception_Cleanup_Fn exception_cleanup;
-    
-    uintptr_t private_1;    
-    uintptr_t private_2;    
-    
+
+    uintptr_t private_1;
+    uintptr_t private_2;
+
     // @@@ The IA-64 ABI says that this structure must be double-word aligned.
-    //  Taking that literally does not make much sense generically.  Instead 
+    //  Taking that literally does not make much sense generically.  Instead
     //  we provide the maximum alignment required by any type for the machine.
   } __attribute__((__aligned__));
-  
+
   struct _Unwind_Context;
   typedef struct _Unwind_Context *_Unwind_Context_t;
-  
+
   extern const uint8_t *_Unwind_GetLanguageSpecificData (_Unwind_Context_t c);
   extern uintptr_t _Unwind_GetGR (_Unwind_Context_t c, int i);
   extern void _Unwind_SetGR (_Unwind_Context_t c, int i, uintptr_t n);
   extern void _Unwind_SetIP (_Unwind_Context_t, uintptr_t new_value);
   extern uintptr_t _Unwind_GetIP (_Unwind_Context_t context);
   extern uintptr_t _Unwind_GetRegionStart (_Unwind_Context_t context);
-  
+
 } // extern "C"
 
 //
@@ -148,13 +148,13 @@ struct OurExceptionType_t {
 
 /// This is our Exception class which relies on a negative offset to calculate
 /// pointers to its instances from pointers to its unwindException member.
-/// 
+///
 /// Note: The above unwind.h defines struct _Unwind_Exception to be aligned
 ///       on a double word boundary. This is necessary to match the standard:
 ///       http://refspecs.freestandards.org/abi-eh-1.21.html
 struct OurBaseException_t {
   struct OurExceptionType_t type;
-  
+
   // Note: This is properly aligned in unwind.h
   struct _Unwind_Exception unwindException;
 };
@@ -165,7 +165,7 @@ typedef struct OurBaseException_t OurException;
 typedef struct _Unwind_Exception OurUnwindException;
 
 //
-// Various globals used to support typeinfo and generatted exceptions in 
+// Various globals used to support typeinfo and generatted exceptions in
 // general
 //
 
@@ -173,7 +173,7 @@ static std::map<std::string, llvm::Value*> namedValues;
 
 int64_t ourBaseFromUnwindOffset;
 
-const unsigned char ourBaseExcpClassChars[] = 
+const unsigned char ourBaseExcpClassChars[] =
 {'o', 'b', 'j', '\0', 'b', 'a', 's', '\0'};
 
 
@@ -203,7 +203,7 @@ typedef std::vector<llvm::Type*> ArgTypes;
 /// @param retType function return type
 /// @param theArgTypes function's ordered argument types
 /// @param theArgNames function's ordered arguments needed if use of this
-///        function corresponds to a function definition. Use empty 
+///        function corresponds to a function definition. Use empty
 ///        aggregate for function declarations.
 /// @param functName function name
 /// @param linkage function linkage
@@ -224,17 +224,17 @@ llvm::Function *createFunction(llvm::Module &module,
     llvm::Function::Create(functType, linkage, functName, &module);
   if (!ret || declarationOnly)
     return(ret);
-  
+
   namedValues.clear();
-  unsigned i = 0; 
+  unsigned i = 0;
   for (llvm::Function::arg_iterator argIndex = ret->arg_begin();
        i != theArgNames.size();
        ++argIndex, ++i) {
-    
+
     argIndex->setName(theArgNames[i]);
     namedValues[theArgNames[i]] = argIndex;
   }
-  
+
   return(ret);
 }
 
@@ -250,13 +250,13 @@ static llvm::AllocaInst *createEntryBlockAlloca(llvm::Function &function,
                                                 const std::string &varName,
                                                 llvm::Type *type,
                                                 llvm::Constant *initWith = 0) {
-  llvm::BasicBlock &block = function.getEntryBlock(); 
+  llvm::BasicBlock &block = function.getEntryBlock();
   llvm::IRBuilder<> tmp(&block, block.begin());
   llvm::AllocaInst *ret = tmp.CreateAlloca(type, 0, varName.c_str());
-  
-  if (initWith) 
+
+  if (initWith)
     tmp.CreateStore(initWith, ret);
-  
+
   return(ret);
 }
 
@@ -266,7 +266,7 @@ static llvm::AllocaInst *createEntryBlockAlloca(llvm::Function &function,
 //
 
 //
-// Runtime C Library functions 
+// Runtime C Library functions
 //
 
 // Note: using an extern "C" block so that static functions can be used
@@ -275,7 +275,7 @@ extern "C" {
 // Note: Better ways to decide on bit width
 //
 /// Prints a 32 bit number, according to the format, to stderr.
-/// @param intToPrint integer to print 
+/// @param intToPrint integer to print
 /// @param format printf like format to use when printing
 void print32Int(int intToPrint, const char *format) {
   if (format) {
@@ -292,7 +292,7 @@ void print32Int(int intToPrint, const char *format) {
 // Note: Better ways to decide on bit width
 //
 /// Prints a 64 bit number, according to the format, to stderr.
-/// @param intToPrint integer to print 
+/// @param intToPrint integer to print
 /// @param format printf like format to use when printing
 void print64Int(long int intToPrint, const char *format) {
   if (format) {
@@ -327,19 +327,19 @@ void deleteOurException(OurUnwindException *expToDelete) {
   fprintf(stderr,
           "deleteOurException(...).\n");
 #endif
-  
+
   if (expToDelete &&
       (expToDelete->exception_class == ourBaseExceptionClass)) {
-    
+
     free(((char*) expToDelete) + ourBaseFromUnwindOffset);
   }
 }
 
 
-/// This function is the struct _Unwind_Exception API mandated delete function 
-/// used by foreign exception handlers when deleting our exception 
+/// This function is the struct _Unwind_Exception API mandated delete function
+/// used by foreign exception handlers when deleting our exception
 /// (OurException), instances.
-/// @param reason @link http://refspecs.freestandards.org/abi-eh-1.21.html 
+/// @param reason @link http://refspecs.freestandards.org/abi-eh-1.21.html
 /// @unlink
 /// @param expToDelete exception instance to delete
 void deleteFromUnwindOurException(_Unwind_Reason_Code reason,
@@ -348,7 +348,7 @@ void deleteFromUnwindOurException(_Unwind_Reason_Code reason,
   fprintf(stderr,
           "deleteFromUnwindOurException(...).\n");
 #endif
-  
+
   deleteOurException(expToDelete);
 }
 
@@ -362,13 +362,13 @@ OurUnwindException *createOurException(int type) {
   (ret->type).type = type;
   (ret->unwindException).exception_class = ourBaseExceptionClass;
   (ret->unwindException).exception_cleanup = deleteFromUnwindOurException;
-  
+
   return(&(ret->unwindException));
 }
 
 
-/// Read a uleb128 encoded value and advance pointer 
-/// See Variable Length Data in: 
+/// Read a uleb128 encoded value and advance pointer
+/// See Variable Length Data in:
 /// @link http://dwarfstd.org/Dwarf3.pdf @unlink
 /// @param data reference variable holding memory pointer to decode from
 /// @returns decoded value
@@ -377,22 +377,22 @@ static uintptr_t readULEB128(const uint8_t **data) {
   uintptr_t shift = 0;
   unsigned char byte;
   const uint8_t *p = *data;
-  
+
   do {
     byte = *p++;
     result |= (byte & 0x7f) << shift;
     shift += 7;
-  } 
+  }
   while (byte & 0x80);
-  
+
   *data = p;
-  
+
   return result;
 }
 
 
-/// Read a sleb128 encoded value and advance pointer 
-/// See Variable Length Data in: 
+/// Read a sleb128 encoded value and advance pointer
+/// See Variable Length Data in:
 /// @link http://dwarfstd.org/Dwarf3.pdf @unlink
 /// @param data reference variable holding memory pointer to decode from
 /// @returns decoded value
@@ -401,26 +401,26 @@ static uintptr_t readSLEB128(const uint8_t **data) {
   uintptr_t shift = 0;
   unsigned char byte;
   const uint8_t *p = *data;
-  
+
   do {
     byte = *p++;
     result |= (byte & 0x7f) << shift;
     shift += 7;
-  } 
+  }
   while (byte & 0x80);
-  
+
   *data = p;
-  
+
   if ((byte & 0x40) && (shift < (sizeof(result) << 3))) {
     result |= (~0 << shift);
   }
-  
+
   return result;
 }
 
 
-/// Read a pointer encoded value and advance pointer 
-/// See Variable Length Data in: 
+/// Read a pointer encoded value and advance pointer
+/// See Variable Length Data in:
 /// @link http://dwarfstd.org/Dwarf3.pdf @unlink
 /// @param data reference variable holding memory pointer to decode from
 /// @param encoding dwarf encoding type
@@ -428,11 +428,11 @@ static uintptr_t readSLEB128(const uint8_t **data) {
 static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
   uintptr_t result = 0;
   const uint8_t *p = *data;
-  
-  if (encoding == llvm::dwarf::DW_EH_PE_omit) 
+
+  if (encoding == llvm::dwarf::DW_EH_PE_omit)
     return(result);
-  
-  // first get value 
+
+  // first get value
   switch (encoding & 0x0F) {
     case llvm::dwarf::DW_EH_PE_absptr:
       result = *((uintptr_t*)p);
@@ -470,15 +470,15 @@ static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
       p += sizeof(int64_t);
       break;
     default:
-      // not supported 
+      // not supported
       abort();
       break;
   }
-  
-  // then add relative offset 
+
+  // then add relative offset
   switch (encoding & 0x70) {
     case llvm::dwarf::DW_EH_PE_absptr:
-      // do nothing 
+      // do nothing
       break;
     case llvm::dwarf::DW_EH_PE_pcrel:
       result += (uintptr_t)(*data);
@@ -488,34 +488,34 @@ static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
     case llvm::dwarf::DW_EH_PE_funcrel:
     case llvm::dwarf::DW_EH_PE_aligned:
     default:
-      // not supported 
+      // not supported
       abort();
       break;
   }
-  
-  // then apply indirection 
+
+  // then apply indirection
   if (encoding & llvm::dwarf::DW_EH_PE_indirect) {
     result = *((uintptr_t*)result);
   }
-  
+
   *data = p;
-  
+
   return result;
 }
 
 
-/// Deals with Dwarf actions matching our type infos 
-/// (OurExceptionType_t instances). Returns whether or not a dwarf emitted 
-/// action matches the supplied exception type. If such a match succeeds, 
-/// the resultAction argument will be set with > 0 index value. Only 
-/// corresponding llvm.eh.selector type info arguments, cleanup arguments 
+/// Deals with Dwarf actions matching our type infos
+/// (OurExceptionType_t instances). Returns whether or not a dwarf emitted
+/// action matches the supplied exception type. If such a match succeeds,
+/// the resultAction argument will be set with > 0 index value. Only
+/// corresponding llvm.eh.selector type info arguments, cleanup arguments
 /// are supported. Filters are not supported.
-/// See Variable Length Data in: 
+/// See Variable Length Data in:
 /// @link http://dwarfstd.org/Dwarf3.pdf @unlink
 /// Also see @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
 /// @param resultAction reference variable which will be set with result
 /// @param classInfo our array of type info pointers (to globals)
-/// @param actionEntry index into above type info array or 0 (clean up). 
+/// @param actionEntry index into above type info array or 0 (clean up).
 ///        We do not support filters.
 /// @param exceptionClass exception class (_Unwind_Exception::exception_class)
 ///        of thrown exception.
@@ -523,22 +523,22 @@ static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
 /// @returns whether or not a type info was found. False is returned if only
 ///          a cleanup was found
 static bool handleActionValue(int64_t *resultAction,
-                              struct OurExceptionType_t **classInfo, 
-                              uintptr_t actionEntry, 
-                              uint64_t exceptionClass, 
+                              struct OurExceptionType_t **classInfo,
+                              uintptr_t actionEntry,
+                              uint64_t exceptionClass,
                               struct _Unwind_Exception *exceptionObject) {
   bool ret = false;
-  
-  if (!resultAction || 
-      !exceptionObject || 
+
+  if (!resultAction ||
+      !exceptionObject ||
       (exceptionClass != ourBaseExceptionClass))
     return(ret);
-  
+
   struct OurBaseException_t *excp = (struct OurBaseException_t*)
   (((char*) exceptionObject) + ourBaseFromUnwindOffset);
   struct OurExceptionType_t *excpType = &(excp->type);
   int type = excpType->type;
-  
+
 #ifdef DEBUG
   fprintf(stderr,
           "handleActionValue(...): exceptionObject = <%p>, "
@@ -546,12 +546,12 @@ static bool handleActionValue(int64_t *resultAction,
           exceptionObject,
           excp);
 #endif
-  
+
   const uint8_t *actionPos = (uint8_t*) actionEntry,
   *tempActionPos;
   int64_t typeOffset = 0,
   actionOffset;
-  
+
   for (int i = 0; true; ++i) {
     // Each emitted dwarf action corresponds to a 2 tuple of
     // type info address offset, and action offset to the next
@@ -559,7 +559,7 @@ static bool handleActionValue(int64_t *resultAction,
     typeOffset = readSLEB128(&actionPos);
     tempActionPos = actionPos;
     actionOffset = readSLEB128(&tempActionPos);
-    
+
 #ifdef DEBUG
     fprintf(stderr,
             "handleActionValue(...):typeOffset: <%lld>, "
@@ -567,9 +567,9 @@ static bool handleActionValue(int64_t *resultAction,
             typeOffset,
             actionOffset);
 #endif
-    assert((typeOffset >= 0) && 
+    assert((typeOffset >= 0) &&
            "handleActionValue(...):filters are not supported.");
-    
+
     // Note: A typeOffset == 0 implies that a cleanup llvm.eh.selector
     //       argument has been matched.
     if ((typeOffset > 0) &&
@@ -583,17 +583,17 @@ static bool handleActionValue(int64_t *resultAction,
       ret = true;
       break;
     }
-    
+
 #ifdef DEBUG
     fprintf(stderr,
             "handleActionValue(...):actionValue not found.\n");
 #endif
     if (!actionOffset)
       break;
-    
+
     actionPos += actionOffset;
   }
-  
+
   return(ret);
 }
 
@@ -602,52 +602,52 @@ static bool handleActionValue(int64_t *resultAction,
 /// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
 /// @param version unsupported (ignored), unwind version
 /// @param lsda language specific data area
-/// @param _Unwind_Action actions minimally supported unwind stage 
+/// @param _Unwind_Action actions minimally supported unwind stage
 ///        (forced specifically not supported)
 /// @param exceptionClass exception class (_Unwind_Exception::exception_class)
 ///        of thrown exception.
 /// @param exceptionObject thrown _Unwind_Exception instance.
 /// @param context unwind system context
-/// @returns minimally supported unwinding control indicator 
-static _Unwind_Reason_Code handleLsda(int version, 
+/// @returns minimally supported unwinding control indicator
+static _Unwind_Reason_Code handleLsda(int version,
                                       const uint8_t *lsda,
                                       _Unwind_Action actions,
-                                      uint64_t exceptionClass, 
+                                      uint64_t exceptionClass,
                                     struct _Unwind_Exception *exceptionObject,
                                       _Unwind_Context_t context) {
   _Unwind_Reason_Code ret = _URC_CONTINUE_UNWIND;
-  
+
   if (!lsda)
     return(ret);
-  
+
 #ifdef DEBUG
-  fprintf(stderr, 
+  fprintf(stderr,
           "handleLsda(...):lsda is non-zero.\n");
 #endif
-  
+
   // Get the current instruction pointer and offset it before next
   // instruction in the current frame which threw the exception.
   uintptr_t pc = _Unwind_GetIP(context)-1;
-  
-  // Get beginning current frame's code (as defined by the 
+
+  // Get beginning current frame's code (as defined by the
   // emitted dwarf code)
   uintptr_t funcStart = _Unwind_GetRegionStart(context);
   uintptr_t pcOffset = pc - funcStart;
   struct OurExceptionType_t **classInfo = NULL;
-  
+
   // Note: See JITDwarfEmitter::EmitExceptionTable(...) for corresponding
   //       dwarf emission
-  
+
   // Parse LSDA header.
   uint8_t lpStartEncoding = *lsda++;
-  
+
   if (lpStartEncoding != llvm::dwarf::DW_EH_PE_omit) {
-    readEncodedPointer(&lsda, lpStartEncoding); 
+    readEncodedPointer(&lsda, lpStartEncoding);
   }
-  
+
   uint8_t ttypeEncoding = *lsda++;
   uintptr_t classInfoOffset;
-  
+
   if (ttypeEncoding != llvm::dwarf::DW_EH_PE_omit) {
     // Calculate type info locations in emitted dwarf code which
     // were flagged by type info arguments to llvm.eh.selector
@@ -655,47 +655,47 @@ static _Unwind_Reason_Code handleLsda(int version,
     classInfoOffset = readULEB128(&lsda);
     classInfo = (struct OurExceptionType_t**) (lsda + classInfoOffset);
   }
-  
-  // Walk call-site table looking for range that 
-  // includes current PC. 
-  
+
+  // Walk call-site table looking for range that
+  // includes current PC.
+
   uint8_t         callSiteEncoding = *lsda++;
   uint32_t        callSiteTableLength = readULEB128(&lsda);
   const uint8_t   *callSiteTableStart = lsda;
-  const uint8_t   *callSiteTableEnd = callSiteTableStart + 
+  const uint8_t   *callSiteTableEnd = callSiteTableStart +
   callSiteTableLength;
   const uint8_t   *actionTableStart = callSiteTableEnd;
   const uint8_t   *callSitePtr = callSiteTableStart;
-  
+
   bool foreignException = false;
-  
+
   while (callSitePtr < callSiteTableEnd) {
-    uintptr_t start = readEncodedPointer(&callSitePtr, 
+    uintptr_t start = readEncodedPointer(&callSitePtr,
                                          callSiteEncoding);
-    uintptr_t length = readEncodedPointer(&callSitePtr, 
+    uintptr_t length = readEncodedPointer(&callSitePtr,
                                           callSiteEncoding);
-    uintptr_t landingPad = readEncodedPointer(&callSitePtr, 
+    uintptr_t landingPad = readEncodedPointer(&callSitePtr,
                                               callSiteEncoding);
-    
+
     // Note: Action value
     uintptr_t actionEntry = readULEB128(&callSitePtr);
-    
+
     if (exceptionClass != ourBaseExceptionClass) {
       // We have been notified of a foreign exception being thrown,
       // and we therefore need to execute cleanup landing pads
       actionEntry = 0;
       foreignException = true;
     }
-    
+
     if (landingPad == 0) {
 #ifdef DEBUG
       fprintf(stderr,
               "handleLsda(...): No landing pad found.\n");
 #endif
-      
+
       continue; // no landing pad for this entry
     }
-    
+
     if (actionEntry) {
       actionEntry += ((uintptr_t) actionTableStart) - 1;
     }
@@ -705,55 +705,55 @@ static _Unwind_Reason_Code handleLsda(int version,
               "handleLsda(...):No action table found.\n");
 #endif
     }
-    
+
     bool exceptionMatched = false;
-    
+
     if ((start <= pcOffset) && (pcOffset < (start + length))) {
 #ifdef DEBUG
       fprintf(stderr,
               "handleLsda(...): Landing pad found.\n");
 #endif
       int64_t actionValue = 0;
-      
+
       if (actionEntry) {
         exceptionMatched = handleActionValue(&actionValue,
-                                             classInfo, 
-                                             actionEntry, 
-                                             exceptionClass, 
+                                             classInfo,
+                                             actionEntry,
+                                             exceptionClass,
                                              exceptionObject);
       }
-      
+
       if (!(actions & _UA_SEARCH_PHASE)) {
 #ifdef DEBUG
         fprintf(stderr,
                 "handleLsda(...): installed landing pad "
                 "context.\n");
 #endif
-        
+
         // Found landing pad for the PC.
-        // Set Instruction Pointer to so we re-enter function 
-        // at landing pad. The landing pad is created by the 
+        // Set Instruction Pointer to so we re-enter function
+        // at landing pad. The landing pad is created by the
         // compiler to take two parameters in registers.
-        _Unwind_SetGR(context, 
-                      __builtin_eh_return_data_regno(0), 
+        _Unwind_SetGR(context,
+                      __builtin_eh_return_data_regno(0),
                       (uintptr_t)exceptionObject);
-        
+
         // Note: this virtual register directly corresponds
         //       to the return of the llvm.eh.selector intrinsic
         if (!actionEntry || !exceptionMatched) {
           // We indicate cleanup only
-          _Unwind_SetGR(context, 
-                        __builtin_eh_return_data_regno(1), 
+          _Unwind_SetGR(context,
+                        __builtin_eh_return_data_regno(1),
                         0);
         }
         else {
           // Matched type info index of llvm.eh.selector intrinsic
           // passed here.
-          _Unwind_SetGR(context, 
-                        __builtin_eh_return_data_regno(1), 
+          _Unwind_SetGR(context,
+                        __builtin_eh_return_data_regno(1),
                         actionValue);
         }
-        
+
         // To execute landing pad set here
         _Unwind_SetIP(context, funcStart + landingPad);
         ret = _URC_INSTALL_CONTEXT;
@@ -767,19 +767,19 @@ static _Unwind_Reason_Code handleLsda(int version,
       }
       else {
         // Note: Only non-clean up handlers are marked as
-        //       found. Otherwise the clean up handlers will be 
-        //       re-found and executed during the clean up 
+        //       found. Otherwise the clean up handlers will be
+        //       re-found and executed during the clean up
         //       phase.
 #ifdef DEBUG
         fprintf(stderr,
                 "handleLsda(...): cleanup handler found.\n");
 #endif
       }
-      
+
       break;
     }
   }
-  
+
   return(ret);
 }
 
@@ -788,23 +788,23 @@ static _Unwind_Reason_Code handleLsda(int version,
 /// dwarf unwind info block. Again see: JITDwarfEmitter.cpp.
 /// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
 /// @param version unsupported (ignored), unwind version
-/// @param _Unwind_Action actions minimally supported unwind stage 
+/// @param _Unwind_Action actions minimally supported unwind stage
 ///        (forced specifically not supported)
 /// @param exceptionClass exception class (_Unwind_Exception::exception_class)
 ///        of thrown exception.
 /// @param exceptionObject thrown _Unwind_Exception instance.
 /// @param context unwind system context
-/// @returns minimally supported unwinding control indicator 
-_Unwind_Reason_Code ourPersonality(int version, 
+/// @returns minimally supported unwinding control indicator
+_Unwind_Reason_Code ourPersonality(int version,
                                    _Unwind_Action actions,
-                                   uint64_t exceptionClass, 
+                                   uint64_t exceptionClass,
                                    struct _Unwind_Exception *exceptionObject,
                                    _Unwind_Context_t context) {
 #ifdef DEBUG
-  fprintf(stderr, 
+  fprintf(stderr,
           "We are in ourPersonality(...):actions is <%d>.\n",
           actions);
-  
+
   if (actions & _UA_SEARCH_PHASE) {
     fprintf(stderr, "ourPersonality(...):In search phase.\n");
   }
@@ -812,15 +812,15 @@ _Unwind_Reason_Code ourPersonality(int version,
     fprintf(stderr, "ourPersonality(...):In non-search phase.\n");
   }
 #endif
-  
+
   const uint8_t *lsda = _Unwind_GetLanguageSpecificData(context);
-  
+
 #ifdef DEBUG
-  fprintf(stderr, 
+  fprintf(stderr,
           "ourPersonality(...):lsda = <%p>.\n",
           lsda);
 #endif
-  
+
   // The real work of the personality function is captured here
   return(handleLsda(version,
                     lsda,
@@ -841,12 +841,12 @@ _Unwind_Reason_Code ourPersonality(int version,
 uint64_t genClass(const unsigned char classChars[], size_t classCharsSize)
 {
   uint64_t ret = classChars[0];
-  
+
   for (unsigned i = 1; i < classCharsSize; ++i) {
     ret <<= 8;
     ret += classChars[i];
   }
-  
+
   return(ret);
 }
 
@@ -865,37 +865,37 @@ uint64_t genClass(const unsigned char classChars[], size_t classCharsSize)
 /// @param module code for module instance
 /// @param builder builder instance
 /// @param toPrint string to print
-/// @param useGlobal A value of true (default) indicates a GlobalValue is 
-///        generated, and is used to hold the constant string. A value of 
-///        false indicates that the constant string will be stored on the 
+/// @param useGlobal A value of true (default) indicates a GlobalValue is
+///        generated, and is used to hold the constant string. A value of
+///        false indicates that the constant string will be stored on the
 ///        stack.
-void generateStringPrint(llvm::LLVMContext &context, 
+void generateStringPrint(llvm::LLVMContext &context,
                          llvm::Module &module,
-                         llvm::IRBuilder<> &builder, 
+                         llvm::IRBuilder<> &builder,
                          std::string toPrint,
                          bool useGlobal = true) {
   llvm::Function *printFunct = module.getFunction("printStr");
-  
+
   llvm::Value *stringVar;
-  llvm::Constant *stringConstant = 
+  llvm::Constant *stringConstant =
   llvm::ConstantDataArray::getString(context, toPrint);
-  
+
   if (useGlobal) {
     // Note: Does not work without allocation
-    stringVar = 
-    new llvm::GlobalVariable(module, 
+    stringVar =
+    new llvm::GlobalVariable(module,
                              stringConstant->getType(),
-                             true, 
-                             llvm::GlobalValue::LinkerPrivateLinkage, 
-                             stringConstant, 
+                             true,
+                             llvm::GlobalValue::LinkerPrivateLinkage,
+                             stringConstant,
                              "");
   }
   else {
     stringVar = builder.CreateAlloca(stringConstant->getType());
     builder.CreateStore(stringConstant, stringVar);
   }
-  
-  llvm::Value *cast = builder.CreatePointerCast(stringVar, 
+
+  llvm::Value *cast = builder.CreatePointerCast(stringVar,
                                                 builder.getInt8PtrTy());
   builder.CreateCall(printFunct, cast);
 }
@@ -909,49 +909,49 @@ void generateStringPrint(llvm::LLVMContext &context,
 /// @param printFunct function used to "print" integer
 /// @param toPrint string to print
 /// @param format printf like formating string for print
-/// @param useGlobal A value of true (default) indicates a GlobalValue is 
-///        generated, and is used to hold the constant string. A value of 
-///        false indicates that the constant string will be stored on the 
+/// @param useGlobal A value of true (default) indicates a GlobalValue is
+///        generated, and is used to hold the constant string. A value of
+///        false indicates that the constant string will be stored on the
 ///        stack.
-void generateIntegerPrint(llvm::LLVMContext &context, 
+void generateIntegerPrint(llvm::LLVMContext &context,
                           llvm::Module &module,
-                          llvm::IRBuilder<> &builder, 
+                          llvm::IRBuilder<> &builder,
                           llvm::Function &printFunct,
                           llvm::Value &toPrint,
-                          std::string format, 
+                          std::string format,
                           bool useGlobal = true) {
   llvm::Constant *stringConstant =
     llvm::ConstantDataArray::getString(context, format);
   llvm::Value *stringVar;
-  
+
   if (useGlobal) {
     // Note: Does not seem to work without allocation
-    stringVar = 
-    new llvm::GlobalVariable(module, 
+    stringVar =
+    new llvm::GlobalVariable(module,
                              stringConstant->getType(),
-                             true, 
-                             llvm::GlobalValue::LinkerPrivateLinkage, 
-                             stringConstant, 
+                             true,
+                             llvm::GlobalValue::LinkerPrivateLinkage,
+                             stringConstant,
                              "");
   }
   else {
     stringVar = builder.CreateAlloca(stringConstant->getType());
     builder.CreateStore(stringConstant, stringVar);
   }
-  
-  llvm::Value *cast = builder.CreateBitCast(stringVar, 
+
+  llvm::Value *cast = builder.CreateBitCast(stringVar,
                                             builder.getInt8PtrTy());
   builder.CreateCall2(&printFunct, &toPrint, cast);
 }
 
 
-/// Generates code to handle finally block type semantics: always runs 
-/// regardless of whether a thrown exception is passing through or the 
-/// parent function is simply exiting. In addition to printing some state 
-/// to stderr, this code will resume the exception handling--runs the 
-/// unwind resume block, if the exception has not been previously caught 
-/// by a catch clause, and will otherwise execute the end block (terminator 
-/// block). In addition this function creates the corresponding function's 
+/// Generates code to handle finally block type semantics: always runs
+/// regardless of whether a thrown exception is passing through or the
+/// parent function is simply exiting. In addition to printing some state
+/// to stderr, this code will resume the exception handling--runs the
+/// unwind resume block, if the exception has not been previously caught
+/// by a catch clause, and will otherwise execute the end block (terminator
+/// block). In addition this function creates the corresponding function's
 /// stack storage for the exception pointer and catch flag status.
 /// @param context llvm context
 /// @param module code for module instance
@@ -965,9 +965,9 @@ void generateIntegerPrint(llvm::LLVMContext &context,
 /// @param exceptionStorage reference to exception pointer storage
 /// @param caughtResultStorage reference to landingpad result storage
 /// @returns newly created block
-static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context, 
-                                            llvm::Module &module, 
-                                            llvm::IRBuilder<> &builder, 
+static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context,
+                                            llvm::Module &module,
+                                            llvm::IRBuilder<> &builder,
                                             llvm::Function &toAddTo,
                                             std::string &blockName,
                                             std::string &functionId,
@@ -976,21 +976,21 @@ static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context,
                                             llvm::Value **exceptionCaughtFlag,
                                             llvm::Value **exceptionStorage,
                                             llvm::Value **caughtResultStorage) {
-  assert(exceptionCaughtFlag && 
+  assert(exceptionCaughtFlag &&
          "ExceptionDemo::createFinallyBlock(...):exceptionCaughtFlag "
          "is NULL");
-  assert(exceptionStorage && 
+  assert(exceptionStorage &&
          "ExceptionDemo::createFinallyBlock(...):exceptionStorage "
          "is NULL");
-  assert(caughtResultStorage && 
+  assert(caughtResultStorage &&
          "ExceptionDemo::createFinallyBlock(...):caughtResultStorage "
          "is NULL");
-  
+
   *exceptionCaughtFlag = createEntryBlockAlloca(toAddTo,
                                          "exceptionCaught",
                                          ourExceptionNotThrownState->getType(),
                                          ourExceptionNotThrownState);
-  
+
   llvm::PointerType *exceptionStorageType = builder.getInt8PtrTy();
   *exceptionStorage = createEntryBlockAlloca(toAddTo,
                                              "exceptionStorage",
@@ -1002,35 +1002,35 @@ static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context,
                                               ourCaughtResultType,
                                               llvm::ConstantAggregateZero::get(
                                                 ourCaughtResultType));
-  
+
   llvm::BasicBlock *ret = llvm::BasicBlock::Create(context,
                                                    blockName,
                                                    &toAddTo);
-  
+
   builder.SetInsertPoint(ret);
-  
+
   std::ostringstream bufferToPrint;
   bufferToPrint << "Gen: Executing finally block "
     << blockName << " in " << functionId << "\n";
-  generateStringPrint(context, 
-                      module, 
-                      builder, 
+  generateStringPrint(context,
+                      module,
+                      builder,
                       bufferToPrint.str(),
                       USE_GLOBAL_STR_CONSTS);
-  
+
   llvm::SwitchInst *theSwitch = builder.CreateSwitch(builder.CreateLoad(
-                                                       *exceptionCaughtFlag), 
+                                                       *exceptionCaughtFlag),
                                                      &terminatorBlock,
                                                      2);
   theSwitch->addCase(ourExceptionCaughtState, &terminatorBlock);
   theSwitch->addCase(ourExceptionThrownState, &unwindResumeBlock);
-  
+
   return(ret);
 }
 
 
 /// Generates catch block semantics which print a string to indicate type of
-/// catch executed, sets an exception caught flag, and executes passed in 
+/// catch executed, sets an exception caught flag, and executes passed in
 /// end block (terminator block).
 /// @param context llvm context
 /// @param module code for module instance
@@ -1041,52 +1041,52 @@ static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context,
 /// @param terminatorBlock terminator "end" block
 /// @param exceptionCaughtFlag exception caught/thrown status
 /// @returns newly created block
-static llvm::BasicBlock *createCatchBlock(llvm::LLVMContext &context, 
-                                          llvm::Module &module, 
-                                          llvm::IRBuilder<> &builder, 
+static llvm::BasicBlock *createCatchBlock(llvm::LLVMContext &context,
+                                          llvm::Module &module,
+                                          llvm::IRBuilder<> &builder,
                                           llvm::Function &toAddTo,
                                           std::string &blockName,
                                           std::string &functionId,
                                           llvm::BasicBlock &terminatorBlock,
                                           llvm::Value &exceptionCaughtFlag) {
-  
+
   llvm::BasicBlock *ret = llvm::BasicBlock::Create(context,
                                                    blockName,
                                                    &toAddTo);
-  
+
   builder.SetInsertPoint(ret);
-  
+
   std::ostringstream bufferToPrint;
   bufferToPrint << "Gen: Executing catch block "
   << blockName
   << " in "
   << functionId
   << std::endl;
-  generateStringPrint(context, 
-                      module, 
-                      builder, 
+  generateStringPrint(context,
+                      module,
+                      builder,
                       bufferToPrint.str(),
                       USE_GLOBAL_STR_CONSTS);
   builder.CreateStore(ourExceptionCaughtState, &exceptionCaughtFlag);
   builder.CreateBr(&terminatorBlock);
-  
+
   return(ret);
 }
 
 
-/// Generates a function which invokes a function (toInvoke) and, whose 
-/// unwind block will "catch" the type info types correspondingly held in the 
-/// exceptionTypesToCatch argument. If the toInvoke function throws an 
-/// exception which does not match any type info types contained in 
-/// exceptionTypesToCatch, the generated code will call _Unwind_Resume 
-/// with the raised exception. On the other hand the generated code will 
+/// Generates a function which invokes a function (toInvoke) and, whose
+/// unwind block will "catch" the type info types correspondingly held in the
+/// exceptionTypesToCatch argument. If the toInvoke function throws an
+/// exception which does not match any type info types contained in
+/// exceptionTypesToCatch, the generated code will call _Unwind_Resume
+/// with the raised exception. On the other hand the generated code will
 /// normally exit if the toInvoke function does not throw an exception.
-/// The generated "finally" block is always run regardless of the cause of 
+/// The generated "finally" block is always run regardless of the cause of
 /// the generated function exit.
 /// The generated function is returned after being verified.
 /// @param module code for module instance
 /// @param builder builder instance
-/// @param fpm a function pass manager holding optional IR to IR 
+/// @param fpm a function pass manager holding optional IR to IR
 ///        transformations
 /// @param toInvoke inner function to invoke
 /// @param ourId id used to printing purposes
@@ -1094,76 +1094,76 @@ static llvm::BasicBlock *createCatchBlock(llvm::LLVMContext &context,
 /// @param exceptionTypesToCatch array of type info types to "catch"
 /// @returns generated function
 static
-llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module, 
-                                             llvm::IRBuilder<> &builder, 
+llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
+                                             llvm::IRBuilder<> &builder,
                                              llvm::FunctionPassManager &fpm,
                                              llvm::Function &toInvoke,
                                              std::string ourId,
                                              unsigned numExceptionsToCatch,
                                              unsigned exceptionTypesToCatch[]) {
-  
+
   llvm::LLVMContext &context = module.getContext();
   llvm::Function *toPrint32Int = module.getFunction("print32Int");
-  
+
   ArgTypes argTypes;
   argTypes.push_back(builder.getInt32Ty());
-  
+
   ArgNames argNames;
   argNames.push_back("exceptTypeToThrow");
-  
-  llvm::Function *ret = createFunction(module, 
+
+  llvm::Function *ret = createFunction(module,
                                        builder.getVoidTy(),
-                                       argTypes, 
-                                       argNames, 
+                                       argTypes,
+                                       argNames,
                                        ourId,
-                                       llvm::Function::ExternalLinkage, 
-                                       false, 
+                                       llvm::Function::ExternalLinkage,
+                                       false,
                                        false);
-  
+
   // Block which calls invoke
   llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context,
-                                                          "entry", 
+                                                          "entry",
                                                           ret);
   // Normal block for invoke
-  llvm::BasicBlock *normalBlock = llvm::BasicBlock::Create(context, 
-                                                           "normal", 
+  llvm::BasicBlock *normalBlock = llvm::BasicBlock::Create(context,
+                                                           "normal",
                                                            ret);
   // Unwind block for invoke
-  llvm::BasicBlock *exceptionBlock = llvm::BasicBlock::Create(context, 
-                                                              "exception", 
+  llvm::BasicBlock *exceptionBlock = llvm::BasicBlock::Create(context,
+                                                              "exception",
                                                               ret);
-  
+
   // Block which routes exception to correct catch handler block
-  llvm::BasicBlock *exceptionRouteBlock = llvm::BasicBlock::Create(context, 
-                                                             "exceptionRoute", 
+  llvm::BasicBlock *exceptionRouteBlock = llvm::BasicBlock::Create(context,
+                                                             "exceptionRoute",
                                                              ret);
-  
+
   // Foreign exception handler
-  llvm::BasicBlock *externalExceptionBlock = llvm::BasicBlock::Create(context, 
-                                                          "externalException", 
+  llvm::BasicBlock *externalExceptionBlock = llvm::BasicBlock::Create(context,
+                                                          "externalException",
                                                           ret);
-  
+
   // Block which calls _Unwind_Resume
-  llvm::BasicBlock *unwindResumeBlock = llvm::BasicBlock::Create(context, 
-                                                               "unwindResume", 
+  llvm::BasicBlock *unwindResumeBlock = llvm::BasicBlock::Create(context,
+                                                               "unwindResume",
                                                                ret);
-  
+
   // Clean up block which delete exception if needed
   llvm::BasicBlock *endBlock = llvm::BasicBlock::Create(context, "end", ret);
-  
+
   std::string nextName;
   std::vector<llvm::BasicBlock*> catchBlocks(numExceptionsToCatch);
   llvm::Value *exceptionCaughtFlag = NULL;
   llvm::Value *exceptionStorage = NULL;
   llvm::Value *caughtResultStorage = NULL;
-  
-  // Finally block which will branch to unwindResumeBlock if 
+
+  // Finally block which will branch to unwindResumeBlock if
   // exception is not caught. Initializes/allocates stack locations.
-  llvm::BasicBlock *finallyBlock = createFinallyBlock(context, 
-                                                      module, 
-                                                      builder, 
-                                                      *ret, 
-                                                      nextName = "finally", 
+  llvm::BasicBlock *finallyBlock = createFinallyBlock(context,
+                                                      module,
+                                                      builder,
+                                                      *ret,
+                                                      nextName = "finally",
                                                       ourId,
                                                       *endBlock,
                                                       *unwindResumeBlock,
@@ -1171,74 +1171,74 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
                                                       &exceptionStorage,
                                                       &caughtResultStorage
                                                       );
-  
+
   for (unsigned i = 0; i < numExceptionsToCatch; ++i) {
     nextName = ourTypeInfoNames[exceptionTypesToCatch[i]];
-    
+
     // One catch block per type info to be caught
-    catchBlocks[i] = createCatchBlock(context, 
-                                      module, 
-                                      builder, 
+    catchBlocks[i] = createCatchBlock(context,
+                                      module,
+                                      builder,
                                       *ret,
-                                      nextName, 
+                                      nextName,
                                       ourId,
                                       *finallyBlock,
                                       *exceptionCaughtFlag);
   }
-  
+
   // Entry Block
-  
+
   builder.SetInsertPoint(entryBlock);
-  
+
   std::vector<llvm::Value*> args;
   args.push_back(namedValues["exceptTypeToThrow"]);
-  builder.CreateInvoke(&toInvoke, 
-                       normalBlock, 
-                       exceptionBlock, 
+  builder.CreateInvoke(&toInvoke,
+                       normalBlock,
+                       exceptionBlock,
                        args);
-  
+
   // End Block
-  
+
   builder.SetInsertPoint(endBlock);
-  
-  generateStringPrint(context, 
+
+  generateStringPrint(context,
                       module,
-                      builder, 
+                      builder,
                       "Gen: In end block: exiting in " + ourId + ".\n",
                       USE_GLOBAL_STR_CONSTS);
   llvm::Function *deleteOurException = module.getFunction("deleteOurException");
-  
+
   // Note: function handles NULL exceptions
-  builder.CreateCall(deleteOurException, 
+  builder.CreateCall(deleteOurException,
                      builder.CreateLoad(exceptionStorage));
   builder.CreateRetVoid();
-  
+
   // Normal Block
-  
+
   builder.SetInsertPoint(normalBlock);
-  
-  generateStringPrint(context, 
+
+  generateStringPrint(context,
                       module,
-                      builder, 
+                      builder,
                       "Gen: No exception in " + ourId + "!\n",
                       USE_GLOBAL_STR_CONSTS);
-  
+
   // Finally block is always called
   builder.CreateBr(finallyBlock);
-  
+
   // Unwind Resume Block
-  
+
   builder.SetInsertPoint(unwindResumeBlock);
-  
+
   builder.CreateResume(builder.CreateLoad(caughtResultStorage));
-  
+
   // Exception Block
-  
+
   builder.SetInsertPoint(exceptionBlock);
-  
+
   llvm::Function *personality = module.getFunction("ourPersonality");
-  
-  llvm::LandingPadInst *caughtResult = 
+
+  llvm::LandingPadInst *caughtResult =
     builder.CreateLandingPad(ourCaughtResultType,
                              personality,
                              numExceptionsToCatch,
@@ -1255,48 +1255,48 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
   llvm::Value *unwindException = builder.CreateExtractValue(caughtResult, 0);
   llvm::Value *retTypeInfoIndex = builder.CreateExtractValue(caughtResult, 1);
 
-  // FIXME: Redundant storage which, beyond utilizing value of 
-  //        caughtResultStore for unwindException storage, may be alleviated 
+  // FIXME: Redundant storage which, beyond utilizing value of
+  //        caughtResultStore for unwindException storage, may be alleviated
   //        altogether with a block rearrangement
   builder.CreateStore(caughtResult, caughtResultStorage);
   builder.CreateStore(unwindException, exceptionStorage);
   builder.CreateStore(ourExceptionThrownState, exceptionCaughtFlag);
-  
-  // Retrieve exception_class member from thrown exception 
+
+  // Retrieve exception_class member from thrown exception
   // (_Unwind_Exception instance). This member tells us whether or not
   // the exception is foreign.
-  llvm::Value *unwindExceptionClass = 
+  llvm::Value *unwindExceptionClass =
     builder.CreateLoad(builder.CreateStructGEP(
-             builder.CreatePointerCast(unwindException, 
-                                       ourUnwindExceptionType->getPointerTo()), 
+             builder.CreatePointerCast(unwindException,
+                                       ourUnwindExceptionType->getPointerTo()),
                                                0));
-  
+
   // Branch to the externalExceptionBlock if the exception is foreign or
   // to a catch router if not. Either way the finally block will be run.
   builder.CreateCondBr(builder.CreateICmpEQ(unwindExceptionClass,
-                            llvm::ConstantInt::get(builder.getInt64Ty(), 
+                            llvm::ConstantInt::get(builder.getInt64Ty(),
                                                    ourBaseExceptionClass)),
                        exceptionRouteBlock,
                        externalExceptionBlock);
-  
+
   // External Exception Block
-  
+
   builder.SetInsertPoint(externalExceptionBlock);
-  
-  generateStringPrint(context, 
+
+  generateStringPrint(context,
                       module,
-                      builder, 
+                      builder,
                       "Gen: Foreign exception received.\n",
                       USE_GLOBAL_STR_CONSTS);
-  
+
   // Branch to the finally block
   builder.CreateBr(finallyBlock);
-  
+
   // Exception Route Block
-  
+
   builder.SetInsertPoint(exceptionRouteBlock);
-  
-  // Casts exception pointer (_Unwind_Exception instance) to parent 
+
+  // Casts exception pointer (_Unwind_Exception instance) to parent
   // (OurException instance).
   //
   // Note: ourBaseFromUnwindOffset is usually negative
@@ -1304,34 +1304,34 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
                                   builder.CreateConstGEP1_64(unwindException,
                                                        ourBaseFromUnwindOffset),
                                   ourExceptionType->getPointerTo());
-  
+
   // Retrieve thrown exception type info type
   //
   // Note: Index is not relative to pointer but instead to structure
   //       unlike a true getelementptr (GEP) instruction
   typeInfoThrown = builder.CreateStructGEP(typeInfoThrown, 0);
-  
-  llvm::Value *typeInfoThrownType = 
+
+  llvm::Value *typeInfoThrownType =
   builder.CreateStructGEP(typeInfoThrown, 0);
-  
-  generateIntegerPrint(context, 
+
+  generateIntegerPrint(context,
                        module,
-                       builder, 
-                       *toPrint32Int, 
+                       builder,
+                       *toPrint32Int,
                        *(builder.CreateLoad(typeInfoThrownType)),
-                       "Gen: Exception type <%d> received (stack unwound) " 
-                       " in " + 
-                       ourId + 
+                       "Gen: Exception type <%d> received (stack unwound) "
+                       " in " +
+                       ourId +
                        ".\n",
                        USE_GLOBAL_STR_CONSTS);
-  
+
   // Route to matched type info catch block or run cleanup finally block
-  llvm::SwitchInst *switchToCatchBlock = builder.CreateSwitch(retTypeInfoIndex, 
-                                                          finallyBlock, 
+  llvm::SwitchInst *switchToCatchBlock = builder.CreateSwitch(retTypeInfoIndex,
+                                                          finallyBlock,
                                                           numExceptionsToCatch);
-  
+
   unsigned nextTypeToCatch;
-  
+
   for (unsigned i = 1; i <= numExceptionsToCatch; ++i) {
     nextTypeToCatch = i - 1;
     switchToCatchBlock->addCase(llvm::ConstantInt::get(
@@ -1341,18 +1341,18 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
 
   llvm::verifyFunction(*ret);
   fpm.run(*ret);
-  
+
   return(ret);
 }
 
 
 /// Generates function which throws either an exception matched to a runtime
-/// determined type info type (argument to generated function), or if this 
-/// runtime value matches nativeThrowType, throws a foreign exception by 
+/// determined type info type (argument to generated function), or if this
+/// runtime value matches nativeThrowType, throws a foreign exception by
 /// calling nativeThrowFunct.
 /// @param module code for module instance
 /// @param builder builder instance
-/// @param fpm a function pass manager holding optional IR to IR 
+/// @param fpm a function pass manager holding optional IR to IR
 ///        transformations
 /// @param ourId id used to printing purposes
 /// @param nativeThrowType a runtime argument of this value results in
@@ -1361,8 +1361,8 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
 ///        if the above nativeThrowType matches generated function's arg.
 /// @returns generated function
 static
-llvm::Function *createThrowExceptionFunction(llvm::Module &module, 
-                                             llvm::IRBuilder<> &builder, 
+llvm::Function *createThrowExceptionFunction(llvm::Module &module,
+                                             llvm::IRBuilder<> &builder,
                                              llvm::FunctionPassManager &fpm,
                                              std::string ourId,
                                              int32_t nativeThrowType,
@@ -1373,7 +1373,7 @@ llvm::Function *createThrowExceptionFunction(llvm::Module &module,
   unwindArgTypes.push_back(builder.getInt32Ty());
   ArgNames unwindArgNames;
   unwindArgNames.push_back("exceptTypeToThrow");
-  
+
   llvm::Function *ret = createFunction(module,
                                        builder.getVoidTy(),
                                        unwindArgTypes,
@@ -1382,88 +1382,88 @@ llvm::Function *createThrowExceptionFunction(llvm::Module &module,
                                        llvm::Function::ExternalLinkage,
                                        false,
                                        false);
-  
+
   // Throws either one of our exception or a native C++ exception depending
   // on a runtime argument value containing a type info type.
   llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context,
-                                                          "entry", 
+                                                          "entry",
                                                           ret);
   // Throws a foreign exception
   llvm::BasicBlock *nativeThrowBlock = llvm::BasicBlock::Create(context,
-                                                                "nativeThrow", 
+                                                                "nativeThrow",
                                                                 ret);
   // Throws one of our Exceptions
   llvm::BasicBlock *generatedThrowBlock = llvm::BasicBlock::Create(context,
-                                                             "generatedThrow", 
+                                                             "generatedThrow",
                                                              ret);
   // Retrieved runtime type info type to throw
   llvm::Value *exceptionType = namedValues["exceptTypeToThrow"];
-  
+
   // nativeThrowBlock block
-  
+
   builder.SetInsertPoint(nativeThrowBlock);
-  
+
   // Throws foreign exception
   builder.CreateCall(&nativeThrowFunct, exceptionType);
   builder.CreateUnreachable();
-  
+
   // entry block
-  
+
   builder.SetInsertPoint(entryBlock);
-  
+
   llvm::Function *toPrint32Int = module.getFunction("print32Int");
-  generateIntegerPrint(context, 
+  generateIntegerPrint(context,
                        module,
-                       builder, 
-                       *toPrint32Int, 
-                       *exceptionType, 
-                       "\nGen: About to throw exception type <%d> in " + 
-                       ourId + 
+                       builder,
+                       *toPrint32Int,
+                       *exceptionType,
+                       "\nGen: About to throw exception type <%d> in " +
+                       ourId +
                        ".\n",
                        USE_GLOBAL_STR_CONSTS);
-  
+
   // Switches on runtime type info type value to determine whether or not
-  // a foreign exception is thrown. Defaults to throwing one of our 
+  // a foreign exception is thrown. Defaults to throwing one of our
   // generated exceptions.
   llvm::SwitchInst *theSwitch = builder.CreateSwitch(exceptionType,
                                                      generatedThrowBlock,
                                                      1);
-  
-  theSwitch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 
+
+  theSwitch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context),
                                             nativeThrowType),
                      nativeThrowBlock);
-  
+
   // generatedThrow block
-  
+
   builder.SetInsertPoint(generatedThrowBlock);
-  
+
   llvm::Function *createOurException = module.getFunction("createOurException");
   llvm::Function *raiseOurException = module.getFunction(
                                         "_Unwind_RaiseException");
-  
+
   // Creates exception to throw with runtime type info type.
-  llvm::Value *exception = builder.CreateCall(createOurException, 
+  llvm::Value *exception = builder.CreateCall(createOurException,
                                               namedValues["exceptTypeToThrow"]);
-  
+
   // Throw generated Exception
   builder.CreateCall(raiseOurException, exception);
   builder.CreateUnreachable();
-  
+
   llvm::verifyFunction(*ret);
   fpm.run(*ret);
-  
+
   return(ret);
 }
 
 static void createStandardUtilityFunctions(unsigned numTypeInfos,
-                                           llvm::Module &module, 
+                                           llvm::Module &module,
                                            llvm::IRBuilder<> &builder);
 
-/// Creates test code by generating and organizing these functions into the 
+/// Creates test code by generating and organizing these functions into the
 /// test case. The test case consists of an outer function setup to invoke
-/// an inner function within an environment having multiple catch and single 
+/// an inner function within an environment having multiple catch and single
 /// finally blocks. This inner function is also setup to invoke a throw
-/// function within an evironment similar in nature to the outer function's 
+/// function within an evironment similar in nature to the outer function's
 /// catch and finally blocks. Each of these two functions catch mutually
 /// exclusive subsets (even or odd) of the type info types configured
 /// for this this. All generated functions have a runtime argument which
@@ -1474,26 +1474,26 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
 /// a supplied a function which in turn will throw a foreign exception.
 /// @param module code for module instance
 /// @param builder builder instance
-/// @param fpm a function pass manager holding optional IR to IR 
+/// @param fpm a function pass manager holding optional IR to IR
 ///        transformations
 /// @param nativeThrowFunctName name of external function which will throw
 ///        a foreign exception
 /// @returns outermost generated test function.
-llvm::Function *createUnwindExceptionTest(llvm::Module &module, 
-                                          llvm::IRBuilder<> &builder, 
+llvm::Function *createUnwindExceptionTest(llvm::Module &module,
+                                          llvm::IRBuilder<> &builder,
                                           llvm::FunctionPassManager &fpm,
                                           std::string nativeThrowFunctName) {
   // Number of type infos to generate
   unsigned numTypeInfos = 6;
-  
+
   // Initialze intrisics and external functions to use along with exception
   // and type info globals.
   createStandardUtilityFunctions(numTypeInfos,
                                  module,
                                  builder);
   llvm::Function *nativeThrowFunct = module.getFunction(nativeThrowFunctName);
-  
-  // Create exception throw function using the value ~0 to cause 
+
+  // Create exception throw function using the value ~0 to cause
   // foreign exceptions to be thrown.
   llvm::Function *throwFunct = createThrowExceptionFunction(module,
                                                             builder,
@@ -1503,9 +1503,9 @@ llvm::Function *createUnwindExceptionTest(llvm::Module &module,
                                                             *nativeThrowFunct);
   // Inner function will catch even type infos
   unsigned innerExceptionTypesToCatch[] = {6, 2, 4};
-  size_t numExceptionTypesToCatch = sizeof(innerExceptionTypesToCatch) / 
+  size_t numExceptionTypesToCatch = sizeof(innerExceptionTypesToCatch) /
                                     sizeof(unsigned);
-  
+
   // Generate inner function.
   llvm::Function *innerCatchFunct = createCatchWrappedInvokeFunction(module,
                                                     builder,
@@ -1514,12 +1514,12 @@ llvm::Function *createUnwindExceptionTest(llvm::Module &module,
                                                     "innerCatchFunct",
                                                     numExceptionTypesToCatch,
                                                     innerExceptionTypesToCatch);
-  
+
   // Outer function will catch odd type infos
   unsigned outerExceptionTypesToCatch[] = {3, 1, 5};
-  numExceptionTypesToCatch = sizeof(outerExceptionTypesToCatch) / 
+  numExceptionTypesToCatch = sizeof(outerExceptionTypesToCatch) /
   sizeof(unsigned);
-  
+
   // Generate outer function
   llvm::Function *outerCatchFunct = createCatchWrappedInvokeFunction(module,
                                                     builder,
@@ -1528,7 +1528,7 @@ llvm::Function *createUnwindExceptionTest(llvm::Module &module,
                                                     "outerCatchFunct",
                                                     numExceptionTypesToCatch,
                                                     outerExceptionTypesToCatch);
-  
+
   // Return outer function to run
   return(outerCatchFunct);
 }
@@ -1539,15 +1539,15 @@ class OurCppRunException : public std::runtime_error {
 public:
   OurCppRunException(const std::string reason) :
   std::runtime_error(reason) {}
-  
+
   OurCppRunException (const OurCppRunException &toCopy) :
   std::runtime_error(toCopy) {}
-  
+
   OurCppRunException &operator = (const OurCppRunException &toCopy) {
     return(reinterpret_cast<OurCppRunException&>(
                                  std::runtime_error::operator=(toCopy)));
   }
-  
+
   ~OurCppRunException (void) throw () {}
 };
 
@@ -1562,7 +1562,7 @@ void throwCppException (int32_t ignoreIt) {
 
 typedef void (*OurExceptionThrowFunctType) (int32_t typeToThrow);
 
-/// This is a test harness which runs test by executing generated 
+/// This is a test harness which runs test by executing generated
 /// function with a type info type to throw. Harness wraps the execution
 /// of generated function in a C++ try catch clause.
 /// @param engine execution engine to use for executing generated function.
@@ -1572,15 +1572,15 @@ typedef void (*OurExceptionThrowFunctType) (int32_t typeToThrow);
 /// @param typeToThrow type info type of generated exception to throw, or
 ///        indicator to cause foreign exception to be thrown.
 static
-void runExceptionThrow(llvm::ExecutionEngine *engine, 
-                       llvm::Function *function, 
+void runExceptionThrow(llvm::ExecutionEngine *engine,
+                       llvm::Function *function,
                        int32_t typeToThrow) {
-  
+
   // Find test's function pointer
-  OurExceptionThrowFunctType functPtr = 
+  OurExceptionThrowFunctType functPtr =
     reinterpret_cast<OurExceptionThrowFunctType>(
        reinterpret_cast<intptr_t>(engine->getPointerToFunction(function)));
-  
+
   try {
     // Run test
     (*functPtr)(typeToThrow);
@@ -1589,15 +1589,15 @@ void runExceptionThrow(llvm::ExecutionEngine *engine,
     // Catch foreign C++ exception
     fprintf(stderr,
             "\nrunExceptionThrow(...):In C++ catch OurCppRunException "
-            "with reason: %s.\n", 
+            "with reason: %s.\n",
             exc.what());
   }
   catch (...) {
-    // Catch all exceptions including our generated ones. This latter 
+    // Catch all exceptions including our generated ones. This latter
     // functionality works according to the example in rules 1.6.4 of
-    // http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22), 
-    // given that these will be exceptions foreign to C++ 
-    // (the _Unwind_Exception::exception_class should be different from 
+    // http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22),
+    // given that these will be exceptions foreign to C++
+    // (the _Unwind_Exception::exception_class should be different from
     // the one used by C++).
     fprintf(stderr,
             "\nrunExceptionThrow(...):In C++ catch all.\n");
@@ -1610,32 +1610,32 @@ void runExceptionThrow(llvm::ExecutionEngine *engine,
 
 typedef llvm::ArrayRef<llvm::Type*> TypeArray;
 
-/// This initialization routine creates type info globals and 
+/// This initialization routine creates type info globals and
 /// adds external function declarations to module.
 /// @param numTypeInfos number of linear type info associated type info types
 ///        to create as GlobalVariable instances, starting with the value 1.
 /// @param module code for module instance
 /// @param builder builder instance
 static void createStandardUtilityFunctions(unsigned numTypeInfos,
-                                           llvm::Module &module, 
+                                           llvm::Module &module,
                                            llvm::IRBuilder<> &builder) {
-  
+
   llvm::LLVMContext &context = module.getContext();
-  
+
   // Exception initializations
-  
+
   // Setup exception catch state
-  ourExceptionNotThrownState = 
+  ourExceptionNotThrownState =
     llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 0),
-  ourExceptionThrownState = 
+  ourExceptionThrownState =
     llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 1),
-  ourExceptionCaughtState = 
+  ourExceptionCaughtState =
     llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 2),
-  
-  
-  
+
+
+
   // Create our type info type
-  ourTypeInfoType = llvm::StructType::get(context, 
+  ourTypeInfoType = llvm::StructType::get(context,
                                           TypeArray(builder.getInt32Ty()));
 
   llvm::Type *caughtResultFieldTypes[] = {
@@ -1648,47 +1648,47 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
                                             TypeArray(caughtResultFieldTypes));
 
   // Create OurException type
-  ourExceptionType = llvm::StructType::get(context, 
+  ourExceptionType = llvm::StructType::get(context,
                                            TypeArray(ourTypeInfoType));
-  
+
   // Create portion of _Unwind_Exception type
   //
   // Note: Declaring only a portion of the _Unwind_Exception struct.
   //       Does this cause problems?
   ourUnwindExceptionType =
-    llvm::StructType::get(context, 
+    llvm::StructType::get(context,
                     TypeArray(builder.getInt64Ty()));
 
   struct OurBaseException_t dummyException;
-  
+
   // Calculate offset of OurException::unwindException member.
-  ourBaseFromUnwindOffset = ((uintptr_t) &dummyException) - 
+  ourBaseFromUnwindOffset = ((uintptr_t) &dummyException) -
                             ((uintptr_t) &(dummyException.unwindException));
-  
+
 #ifdef DEBUG
   fprintf(stderr,
           "createStandardUtilityFunctions(...):ourBaseFromUnwindOffset "
           "= %lld, sizeof(struct OurBaseException_t) - "
           "sizeof(struct _Unwind_Exception) = %lu.\n",
           ourBaseFromUnwindOffset,
-          sizeof(struct OurBaseException_t) - 
+          sizeof(struct OurBaseException_t) -
           sizeof(struct _Unwind_Exception));
 #endif
-  
+
   size_t numChars = sizeof(ourBaseExcpClassChars) / sizeof(char);
-  
+
   // Create our _Unwind_Exception::exception_class value
   ourBaseExceptionClass = genClass(ourBaseExcpClassChars, numChars);
-  
+
   // Type infos
-  
+
   std::string baseStr = "typeInfo", typeInfoName;
   std::ostringstream typeInfoNameBuilder;
   std::vector<llvm::Constant*> structVals;
-  
+
   llvm::Constant *nextStruct;
   llvm::GlobalVariable *nextGlobal = NULL;
-  
+
   // Generate each type info
   //
   // Note: First type info is not used.
@@ -1696,202 +1696,202 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
     structVals.clear();
     structVals.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), i));
     nextStruct = llvm::ConstantStruct::get(ourTypeInfoType, structVals);
-    
+
     typeInfoNameBuilder.str("");
     typeInfoNameBuilder << baseStr << i;
     typeInfoName = typeInfoNameBuilder.str();
-    
+
     // Note: Does not seem to work without allocation
-    nextGlobal = 
-    new llvm::GlobalVariable(module, 
-                             ourTypeInfoType, 
-                             true, 
-                             llvm::GlobalValue::ExternalLinkage, 
-                             nextStruct, 
+    nextGlobal =
+    new llvm::GlobalVariable(module,
+                             ourTypeInfoType,
+                             true,
+                             llvm::GlobalValue::ExternalLinkage,
+                             nextStruct,
                              typeInfoName);
-    
+
     ourTypeInfoNames.push_back(typeInfoName);
     ourTypeInfoNamesIndex[i] = typeInfoName;
   }
-  
+
   ArgNames argNames;
   ArgTypes argTypes;
   llvm::Function *funct = NULL;
-  
+
   // print32Int
-  
+
   llvm::Type *retType = builder.getVoidTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt32Ty());
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "print32Int", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "print32Int",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // print64Int
-  
+
   retType = builder.getVoidTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt64Ty());
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "print64Int", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "print64Int",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // printStr
-  
+
   retType = builder.getVoidTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "printStr", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "printStr",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // throwCppException
-  
+
   retType = builder.getVoidTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt32Ty());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "throwCppException", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "throwCppException",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // deleteOurException
-  
+
   retType = builder.getVoidTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "deleteOurException", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "deleteOurException",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // createOurException
-  
+
   retType = builder.getInt8PtrTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt32Ty());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "createOurException", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "createOurException",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // _Unwind_RaiseException
-  
+
   retType = builder.getInt32Ty();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  funct = createFunction(module, 
-                         retType, 
-                         argTypes, 
-                         argNames, 
-                         "_Unwind_RaiseException", 
-                         llvm::Function::ExternalLinkage, 
-                         true, 
+
+  funct = createFunction(module,
+                         retType,
+                         argTypes,
+                         argNames,
+                         "_Unwind_RaiseException",
+                         llvm::Function::ExternalLinkage,
+                         true,
                          false);
-  
-  funct->addFnAttr(llvm::Attribute::NoReturn);
-  
+
+  funct->setDoesNotReturn();
+
   // _Unwind_Resume
-  
+
   retType = builder.getInt32Ty();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  funct = createFunction(module, 
-                         retType, 
-                         argTypes, 
-                         argNames, 
-                         "_Unwind_Resume", 
-                         llvm::Function::ExternalLinkage, 
-                         true, 
+
+  funct = createFunction(module,
+                         retType,
+                         argTypes,
+                         argNames,
+                         "_Unwind_Resume",
+                         llvm::Function::ExternalLinkage,
+                         true,
                          false);
-  
-  funct->addFnAttr(llvm::Attribute::NoReturn);
-  
+
+  funct->setDoesNotReturn();
+
   // ourPersonality
-  
+
   retType = builder.getInt32Ty();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt32Ty());
   argTypes.push_back(builder.getInt32Ty());
   argTypes.push_back(builder.getInt64Ty());
   argTypes.push_back(builder.getInt8PtrTy());
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "ourPersonality", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "ourPersonality",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // llvm.eh.typeid.for intrinsic
-  
+
   getDeclaration(&module, llvm::Intrinsic::eh_typeid_for);
 }
 
@@ -1901,7 +1901,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
 //===----------------------------------------------------------------------===//
 
 /// Demo main routine which takes the type info types to throw. A test will
-/// be run for each given type info type. While type info types with the value 
+/// be run for each given type info type. While type info types with the value
 /// of -1 will trigger a foreign C++ exception to be thrown; type info types
 /// <= 6 and >= 1 will be caught by test functions; and type info types > 6
 /// will result in exceptions which pass through to the test harness. All other
@@ -1920,87 +1920,86 @@ int main(int argc, char *argv[]) {
             "   for a full test.\n\n");
     return(0);
   }
-  
+
   // If not set, exception handling will not be turned on
   llvm::TargetOptions Opts;
   Opts.JITExceptionHandling = true;
-  
+
   llvm::InitializeNativeTarget();
   llvm::LLVMContext &context = llvm::getGlobalContext();
   llvm::IRBuilder<> theBuilder(context);
-  
+
   // Make the module, which holds all the code.
   llvm::Module *module = new llvm::Module("my cool jit", context);
-  
+
   // Build engine with JIT
   llvm::EngineBuilder factory(module);
   factory.setEngineKind(llvm::EngineKind::JIT);
   factory.setAllocateGVsWithCode(false);
   factory.setTargetOptions(Opts);
   llvm::ExecutionEngine *executionEngine = factory.create();
-  
+
   {
     llvm::FunctionPassManager fpm(module);
-    
-    // Set up the optimizer pipeline.  
+
+    // Set up the optimizer pipeline.
     // Start with registering info about how the
     // target lays out data structures.
     fpm.add(new llvm::DataLayout(*executionEngine->getDataLayout()));
-    
+
     // Optimizations turned on
 #ifdef ADD_OPT_PASSES
-    
+
     // Basic AliasAnslysis support for GVN.
     fpm.add(llvm::createBasicAliasAnalysisPass());
-    
+
     // Promote allocas to registers.
     fpm.add(llvm::createPromoteMemoryToRegisterPass());
-    
+
     // Do simple "peephole" optimizations and bit-twiddling optzns.
     fpm.add(llvm::createInstructionCombiningPass());
-    
+
     // Reassociate expressions.
     fpm.add(llvm::createReassociatePass());
-    
+
     // Eliminate Common SubExpressions.
     fpm.add(llvm::createGVNPass());
-    
-    // Simplify the control flow graph (deleting unreachable 
+
+    // Simplify the control flow graph (deleting unreachable
     // blocks, etc).
     fpm.add(llvm::createCFGSimplificationPass());
 #endif  // ADD_OPT_PASSES
-    
+
     fpm.doInitialization();
-    
+
     // Generate test code using function throwCppException(...) as
     // the function which throws foreign exceptions.
-    llvm::Function *toRun = 
-    createUnwindExceptionTest(*module, 
-                              theBuilder, 
+    llvm::Function *toRun =
+    createUnwindExceptionTest(*module,
+                              theBuilder,
                               fpm,
                               "throwCppException");
-    
+
     fprintf(stderr, "\nBegin module dump:\n\n");
-    
+
     module->dump();
-    
+
     fprintf(stderr, "\nEnd module dump:\n");
-    
+
     fprintf(stderr, "\n\nBegin Test:\n");
-    
+
     for (int i = 1; i < argc; ++i) {
       // Run test for each argument whose value is the exception
       // type to throw.
-      runExceptionThrow(executionEngine, 
-                        toRun, 
+      runExceptionThrow(executionEngine,
+                        toRun,
                         (unsigned) strtoul(argv[i], NULL, 10));
     }
-    
+
     fprintf(stderr, "\nEnd Test:\n\n");
-  } 
-  
+  }
+
   delete executionEngine;
-  
+
   return 0;
 }
-
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 8cf03c268cd..620d0887be7 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -1803,7 +1803,7 @@ LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg);
  * Set the alignment for a function parameter.
  *
  * @see llvm::Argument::addAttr()
- * @see llvm::Attributes::constructAlignmentFromInt()
+ * @see llvm::AttrBuilder::addAlignmentAttr()
  */
 void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align);
 
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index 92228701e31..57abfa0207f 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -172,10 +172,20 @@ enum LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef);
     See the method llvm::DataLayout::getPointerSize. */
 unsigned LLVMPointerSize(LLVMTargetDataRef);
 
+/** Returns the pointer size in bytes for a target for a specified
+    address space.
+    See the method llvm::DataLayout::getPointerSize. */
+unsigned LLVMPointerSizeForAS(LLVMTargetDataRef, unsigned AS);
+
 /** Returns the integer type that is the same size as a pointer on a target.
     See the method llvm::DataLayout::getIntPtrType. */
 LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef);
 
+/** Returns the integer type that is the same size as a pointer on a target.
+    This version allows the address space to be specified.
+    See the method llvm::DataLayout::getIntPtrType. */
+LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef, unsigned AS);
+
 /** Computes the size of a type in bytes for a target.
     See the method llvm::DataLayout::getTypeSizeInBits. */
 unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef, LLVMTypeRef);
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index 26ec346b182..9d6388f7ee6 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -237,6 +237,34 @@ public:
     return *this;
   }
 
+  /// set - Efficiently set a range of bits in [I, E)
+  BitVector &set(unsigned I, unsigned E) {
+    assert(I <= E && "Attempted to set backwards range!");
+    assert(E <= size() && "Attempted to set out-of-bounds range!");
+
+    if (I == E) return *this;
+
+    if (I / BITWORD_SIZE == E / BITWORD_SIZE) {
+      BitWord EMask = 1UL << (E % BITWORD_SIZE);
+      BitWord IMask = 1UL << (I % BITWORD_SIZE);
+      BitWord Mask = EMask - IMask;
+      Bits[I / BITWORD_SIZE] |= Mask;
+      return *this;
+    }
+
+    BitWord PrefixMask = ~0UL << (I % BITWORD_SIZE);
+    Bits[I / BITWORD_SIZE] |= PrefixMask;
+    I = RoundUpToAlignment(I, BITWORD_SIZE);
+
+    for (; I + BITWORD_SIZE <= E; I += BITWORD_SIZE)
+      Bits[I / BITWORD_SIZE] = ~0UL;
+
+    BitWord PostfixMask = (1UL << (E % BITWORD_SIZE)) - 1;
+    Bits[I / BITWORD_SIZE] |= PostfixMask;
+
+    return *this;
+  }
+
   BitVector &reset() {
     init_words(Bits, Capacity, false);
     return *this;
@@ -247,6 +275,34 @@ public:
     return *this;
   }
 
+  /// reset - Efficiently reset a range of bits in [I, E)
+  BitVector &reset(unsigned I, unsigned E) {
+    assert(I <= E && "Attempted to reset backwards range!");
+    assert(E <= size() && "Attempted to reset out-of-bounds range!");
+
+    if (I == E) return *this;
+
+    if (I / BITWORD_SIZE == E / BITWORD_SIZE) {
+      BitWord EMask = 1UL << (E % BITWORD_SIZE);
+      BitWord IMask = 1UL << (I % BITWORD_SIZE);
+      BitWord Mask = EMask - IMask;
+      Bits[I / BITWORD_SIZE] &= ~Mask;
+      return *this;
+    }
+
+    BitWord PrefixMask = ~0UL << (I % BITWORD_SIZE);
+    Bits[I / BITWORD_SIZE] &= ~PrefixMask;
+    I = RoundUpToAlignment(I, BITWORD_SIZE);
+
+    for (; I + BITWORD_SIZE <= E; I += BITWORD_SIZE)
+      Bits[I / BITWORD_SIZE] = 0UL;
+
+    BitWord PostfixMask = (1UL << (E % BITWORD_SIZE)) - 1;
+    Bits[I / BITWORD_SIZE] &= ~PostfixMask;
+
+    return *this;
+  }
+
   BitVector &flip() {
     for (unsigned i = 0; i < NumBitWords(size()); ++i)
       Bits[i] = ~Bits[i];
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 261d0494e2d..3900f96be16 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -89,7 +89,7 @@ public:
   ImutAVLTree* getMaxElement() {
     ImutAVLTree *T = this;
     ImutAVLTree *Right = T->getRight();
-    while (Right) { T = right; right = T->getRight(); }
+    while (Right) { T = Right; Right = T->getRight(); }
     return T;
   }
 
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index 7a645e0c724..fba1d12542a 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -300,6 +300,21 @@ public:
     return *this;
   }
 
+  /// set - Efficiently set a range of bits in [I, E)
+  SmallBitVector &set(unsigned I, unsigned E) {
+    assert(I <= E && "Attempted to set backwards range!");
+    assert(E <= size() && "Attempted to set out-of-bounds range!");
+    if (I == E) return *this;
+    if (isSmall()) {
+      uintptr_t EMask = 1 << E;
+      uintptr_t IMask = 1 << I;
+      uintptr_t Mask = EMask - IMask;
+      setSmallBits(getSmallBits() | Mask);
+    } else
+      getPointer()->set(I, E);
+    return *this;
+  }
+
   SmallBitVector &reset() {
     if (isSmall())
       setSmallBits(0);
@@ -316,6 +331,21 @@ public:
     return *this;
   }
 
+  /// reset - Efficiently reset a range of bits in [I, E)
+  SmallBitVector &reset(unsigned I, unsigned E) {
+    assert(I <= E && "Attempted to reset backwards range!");
+    assert(E <= size() && "Attempted to reset out-of-bounds range!");
+    if (I == E) return *this;
+    if (isSmall()) {
+      uintptr_t EMask = 1 << E;
+      uintptr_t IMask = 1 << I;
+      uintptr_t Mask = EMask - IMask;
+      setSmallBits(getSmallBits() & ~Mask);
+    } else
+      getPointer()->reset(I, E);
+    return *this;
+  }
+
   SmallBitVector &flip() {
     if (isSmall())
       setSmallBits(~getSmallBits());
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index a0527683f67..15fe55fbe3b 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -76,7 +76,8 @@ public:
     SCEI,
     BGP,
     BGQ,
-    Freescale
+    Freescale,
+    IBM
   };
   enum OSType {
     UnknownOS,
@@ -101,7 +102,8 @@ public:
     RTEMS,
     NativeClient,
     CNK,         // BG/P Compute-Node Kernel
-    Bitrig
+    Bitrig,
+    AIX
   };
   enum EnvironmentType {
     UnknownEnvironment,
diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h
new file mode 100644
index 00000000000..9b6a6bbd3ed
--- /dev/null
+++ b/include/llvm/Analysis/DependenceAnalysis.h
@@ -0,0 +1,891 @@
+//===-- llvm/Analysis/DependenceAnalysis.h -------------------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// DependenceAnalysis is an LLVM pass that analyses dependences between memory
+// accesses. Currently, it is an implementation of the approach described in
+//
+//            Practical Dependence Testing
+//            Goff, Kennedy, Tseng
+//            PLDI 1991
+//
+// There's a single entry point that analyzes the dependence between a pair
+// of memory references in a function, returning either NULL, for no dependence,
+// or a more-or-less detailed description of the dependence between them.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+// Plausible changes:
+//    Return a set of more precise dependences instead of just one dependence
+//    summarizing all.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DEPENDENCEANALYSIS_H
+#define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+
+namespace llvm {
+  class AliasAnalysis;
+  class ScalarEvolution;
+  class SCEV;
+  class Value;
+  class raw_ostream;
+
+  /// Dependence - This class represents a dependence between two memory
+  /// memory references in a function. It contains minimal information and
+  /// is used in the very common situation where the compiler is unable to
+  /// determine anything beyond the existence of a dependence; that is, it
+  /// represents a confused dependence (see also FullDependence). In most
+  /// cases (for output, flow, and anti dependences), the dependence implies
+  /// an ordering, where the source must preceed the destination; in contrast,
+  /// input dependences are unordered.
+  class Dependence {
+  public:
+    Dependence(const Instruction *Source,
+               const Instruction *Destination) :
+      Src(Source), Dst(Destination) {}
+    virtual ~Dependence() {}
+
+    /// Dependence::DVEntry - Each level in the distance/direction vector
+    /// has a direction (or perhaps a union of several directions), and
+    /// perhaps a distance.
+    struct DVEntry {
+      enum { NONE = 0,
+             LT = 1,
+             EQ = 2,
+             LE = 3,
+             GT = 4,
+             NE = 5,
+             GE = 6,
+             ALL = 7 };
+      unsigned char Direction : 3; // Init to ALL, then refine.
+      bool Scalar    : 1; // Init to true.
+      bool PeelFirst : 1; // Peeling the first iteration will break dependence.
+      bool PeelLast  : 1; // Peeling the last iteration will break the dependence.
+      bool Splitable : 1; // Splitting the loop will break dependence.
+      const SCEV *Distance; // NULL implies no distance available.
+      DVEntry() : Direction(ALL), Scalar(true), PeelFirst(false),
+                  PeelLast(false), Splitable(false), Distance(NULL) { }
+    };
+
+    /// getSrc - Returns the source instruction for this dependence.
+    ///
+    const Instruction *getSrc() const { return Src; }
+
+    /// getDst - Returns the destination instruction for this dependence.
+    ///
+    const Instruction *getDst() const { return Dst; }
+
+    /// isInput - Returns true if this is an input dependence.
+    ///
+    bool isInput() const;
+
+    /// isOutput - Returns true if this is an output dependence.
+    ///
+    bool isOutput() const;
+
+    /// isFlow - Returns true if this is a flow (aka true) dependence.
+    ///
+    bool isFlow() const;
+
+    /// isAnti - Returns true if this is an anti dependence.
+    ///
+    bool isAnti() const;
+
+    /// isOrdered - Returns true if dependence is Output, Flow, or Anti
+    ///
+    bool isOrdered() const { return isOutput() || isFlow() || isAnti(); }
+
+    /// isUnordered - Returns true if dependence is Input
+    ///
+    bool isUnordered() const { return isInput(); }
+
+    /// isLoopIndependent - Returns true if this is a loop-independent
+    /// dependence.
+    virtual bool isLoopIndependent() const { return true; }
+
+    /// isConfused - Returns true if this dependence is confused
+    /// (the compiler understands nothing and makes worst-case
+    /// assumptions).
+    virtual bool isConfused() const { return true; }
+
+    /// isConsistent - Returns true if this dependence is consistent
+    /// (occurs every time the source and destination are executed).
+    virtual bool isConsistent() const { return false; }
+
+    /// getLevels - Returns the number of common loops surrounding the
+    /// souce and destination of the dependence.
+    virtual unsigned getLevels() const { return 0; }
+
+    /// getDirection - Returns the direction associated with a particular
+    /// level.
+    virtual unsigned getDirection(unsigned Level) const { return DVEntry::ALL; }
+
+    /// getDistance - Returns the distance (or NULL) associated with a
+    /// particular level.
+    virtual const SCEV *getDistance(unsigned Level) const { return NULL; }
+
+    /// isPeelFirst - Returns true if peeling the first iteration from
+    /// this loop will break this dependence.
+    virtual bool isPeelFirst(unsigned Level) const { return false; }
+
+    /// isPeelLast - Returns true if peeling the last iteration from
+    /// this loop will break this dependence.
+    virtual bool isPeelLast(unsigned Level) const { return false; }
+
+    /// isSplitable - Returns true if splitting this loop will break
+    /// the dependence.
+    virtual bool isSplitable(unsigned Level) const { return false; }
+
+    /// isScalar - Returns true if a particular level is scalar; that is,
+    /// if no subscript in the source or destination mention the induction
+    /// variable associated with the loop at this level.
+    virtual bool isScalar(unsigned Level) const;
+
+    /// dump - For debugging purposes, dumps a dependence to OS.
+    ///
+    void dump(raw_ostream &OS) const;
+  private:
+    const Instruction *Src, *Dst;
+    friend class DependenceAnalysis;
+  };
+
+
+  /// FullDependence - This class represents a dependence between two memory
+  /// references in a function. It contains detailed information about the
+  /// dependence (direction vectors, etc) and is used when the compiler is
+  /// able to accurately analyze the interaction of the references; that is,
+  /// it is not a confused dependence (see Dependence). In most cases
+  /// (for output, flow, and anti dependences), the dependence implies an
+  /// ordering, where the source must preceed the destination; in contrast,
+  /// input dependences are unordered.
+  class FullDependence : public Dependence {
+  public:
+    FullDependence(const Instruction *Src,
+                   const Instruction *Dst,
+                   bool LoopIndependent,
+                   unsigned Levels);
+    ~FullDependence() {
+      delete DV;
+    }
+
+    /// isLoopIndependent - Returns true if this is a loop-independent
+    /// dependence.
+    bool isLoopIndependent() const { return LoopIndependent; }
+
+    /// isConfused - Returns true if this dependence is confused
+    /// (the compiler understands nothing and makes worst-case
+    /// assumptions).
+    bool isConfused() const { return false; }
+
+    /// isConsistent - Returns true if this dependence is consistent
+    /// (occurs every time the source and destination are executed).
+    bool isConsistent() const { return Consistent; }
+
+    /// getLevels - Returns the number of common loops surrounding the
+    /// souce and destination of the dependence.
+    unsigned getLevels() const { return Levels; }
+
+    /// getDirection - Returns the direction associated with a particular
+    /// level.
+    unsigned getDirection(unsigned Level) const;
+
+    /// getDistance - Returns the distance (or NULL) associated with a
+    /// particular level.
+    const SCEV *getDistance(unsigned Level) const;
+
+    /// isPeelFirst - Returns true if peeling the first iteration from
+    /// this loop will break this dependence.
+    bool isPeelFirst(unsigned Level) const;
+
+    /// isPeelLast - Returns true if peeling the last iteration from
+    /// this loop will break this dependence.
+    bool isPeelLast(unsigned Level) const;
+
+    /// isSplitable - Returns true if splitting the loop will break
+    /// the dependence.
+    bool isSplitable(unsigned Level) const;
+
+    /// isScalar - Returns true if a particular level is scalar; that is,
+    /// if no subscript in the source or destination mention the induction
+    /// variable associated with the loop at this level.
+    bool isScalar(unsigned Level) const;
+  private:
+    unsigned short Levels;
+    bool LoopIndependent;
+    bool Consistent; // Init to true, then refine.
+    DVEntry *DV;
+    friend class DependenceAnalysis;
+  };
+
+
+  /// DependenceAnalysis - This class is the main dependence-analysis driver.
+  ///
+  class DependenceAnalysis : public FunctionPass {
+    void operator=(const DependenceAnalysis &);     // do not implement
+    DependenceAnalysis(const DependenceAnalysis &); // do not implement
+  public:
+    /// depends - Tests for a dependence between the Src and Dst instructions.
+    /// Returns NULL if no dependence; otherwise, returns a Dependence (or a
+    /// FullDependence) with as much information as can be gleaned.
+    /// The flag PossiblyLoopIndependent should be set by the caller
+    /// if it appears that control flow can reach from Src to Dst
+    /// without traversing a loop back edge.
+    Dependence *depends(const Instruction *Src,
+                        const Instruction *Dst,
+                        bool PossiblyLoopIndependent);
+
+    /// getSplitIteration - Give a dependence that's splitable at some
+    /// particular level, return the iteration that should be used to split
+    /// the loop.
+    ///
+    /// Generally, the dependence analyzer will be used to build
+    /// a dependence graph for a function (basically a map from instructions
+    /// to dependences). Looking for cycles in the graph shows us loops
+    /// that cannot be trivially vectorized/parallelized.
+    ///
+    /// We can try to improve the situation by examining all the dependences
+    /// that make up the cycle, looking for ones we can break.
+    /// Sometimes, peeling the first or last iteration of a loop will break
+    /// dependences, and there are flags for those possibilities.
+    /// Sometimes, splitting a loop at some other iteration will do the trick,
+    /// and we've got a flag for that case. Rather than waste the space to
+    /// record the exact iteration (since we rarely know), we provide
+    /// a method that calculates the iteration. It's a drag that it must work
+    /// from scratch, but wonderful in that it's possible.
+    ///
+    /// Here's an example:
+    ///
+    ///    for (i = 0; i < 10; i++)
+    ///        A[i] = ...
+    ///        ... = A[11 - i]
+    ///
+    /// There's a loop-carried flow dependence from the store to the load,
+    /// found by the weak-crossing SIV test. The dependence will have a flag,
+    /// indicating that the dependence can be broken by splitting the loop.
+    /// Calling getSplitIteration will return 5.
+    /// Splitting the loop breaks the dependence, like so:
+    ///
+    ///    for (i = 0; i <= 5; i++)
+    ///        A[i] = ...
+    ///        ... = A[11 - i]
+    ///    for (i = 6; i < 10; i++)
+    ///        A[i] = ...
+    ///        ... = A[11 - i]
+    ///
+    /// breaks the dependence and allows us to vectorize/parallelize
+    /// both loops.
+    const SCEV *getSplitIteration(const Dependence *Dep, unsigned Level);
+
+  private:
+    AliasAnalysis *AA;
+    ScalarEvolution *SE;
+    LoopInfo *LI;
+    Function *F;
+
+    /// Subscript - This private struct represents a pair of subscripts from
+    /// a pair of potentially multi-dimensional array references. We use a
+    /// vector of them to guide subscript partitioning.
+    struct Subscript {
+      const SCEV *Src;
+      const SCEV *Dst;
+      enum ClassificationKind { ZIV, SIV, RDIV, MIV, NonLinear } Classification;
+      SmallBitVector Loops;
+      SmallBitVector GroupLoops;
+      SmallBitVector Group;
+    };
+
+    struct CoefficientInfo {
+      const SCEV *Coeff;
+      const SCEV *PosPart;
+      const SCEV *NegPart;
+      const SCEV *Iterations;
+    };
+
+    struct BoundInfo {
+      const SCEV *Iterations;
+      const SCEV *Upper[8];
+      const SCEV *Lower[8];
+      unsigned char Direction;
+      unsigned char DirSet;
+    };
+
+    /// Constraint - This private class represents a constraint, as defined
+    /// in the paper
+    ///
+    ///           Practical Dependence Testing
+    ///           Goff, Kennedy, Tseng
+    ///           PLDI 1991
+    ///
+    /// There are 5 kinds of constraint, in a hierarchy.
+    ///   1) Any - indicates no constraint, any dependence is possible.
+    ///   2) Line - A line ax + by = c, where a, b, and c are parameters,
+    ///             representing the dependence equation.
+    ///   3) Distance - The value d of the dependence distance;
+    ///   4) Point - A point <x, y> representing the dependence from
+    ///              iteration x to iteration y.
+    ///   5) Empty - No dependence is possible.
+    class Constraint {
+    private:
+      enum ConstraintKind { Empty, Point, Distance, Line, Any } Kind;
+      ScalarEvolution *SE;
+      const SCEV *A;
+      const SCEV *B;
+      const SCEV *C;
+      const Loop *AssociatedLoop;
+    public:
+      /// isEmpty - Return true if the constraint is of kind Empty.
+      bool isEmpty() const { return Kind == Empty; }
+
+      /// isPoint - Return true if the constraint is of kind Point.
+      bool isPoint() const { return Kind == Point; }
+
+      /// isDistance - Return true if the constraint is of kind Distance.
+      bool isDistance() const { return Kind == Distance; }
+
+      /// isLine - Return true if the constraint is of kind Line.
+      /// Since Distance's can also be represented as Lines, we also return
+      /// true if the constraint is of kind Distance.
+      bool isLine() const { return Kind == Line || Kind == Distance; }
+
+      /// isAny - Return true if the constraint is of kind Any;
+      bool isAny() const { return Kind == Any; }
+
+      /// getX - If constraint is a point <X, Y>, returns X.
+      /// Otherwise assert.
+      const SCEV *getX() const;
+
+      /// getY - If constraint is a point <X, Y>, returns Y.
+      /// Otherwise assert.
+      const SCEV *getY() const;
+
+      /// getA - If constraint is a line AX + BY = C, returns A.
+      /// Otherwise assert.
+      const SCEV *getA() const;
+
+      /// getB - If constraint is a line AX + BY = C, returns B.
+      /// Otherwise assert.
+      const SCEV *getB() const;
+
+      /// getC - If constraint is a line AX + BY = C, returns C.
+      /// Otherwise assert.
+      const SCEV *getC() const;
+
+      /// getD - If constraint is a distance, returns D.
+      /// Otherwise assert.
+      const SCEV *getD() const;
+
+      /// getAssociatedLoop - Returns the loop associated with this constraint.
+      const Loop *getAssociatedLoop() const;
+
+      /// setPoint - Change a constraint to Point.
+      void setPoint(const SCEV *X, const SCEV *Y, const Loop *CurrentLoop);
+
+      /// setLine - Change a constraint to Line.
+      void setLine(const SCEV *A, const SCEV *B,
+                   const SCEV *C, const Loop *CurrentLoop);
+
+      /// setDistance - Change a constraint to Distance.
+      void setDistance(const SCEV *D, const Loop *CurrentLoop);
+
+      /// setEmpty - Change a constraint to Empty.
+      void setEmpty();
+
+      /// setAny - Change a constraint to Any.
+      void setAny(ScalarEvolution *SE);
+
+      /// dump - For debugging purposes. Dumps the constraint
+      /// out to OS.
+      void dump(raw_ostream &OS) const;
+    };
+
+
+    /// establishNestingLevels - Examines the loop nesting of the Src and Dst
+    /// instructions and establishes their shared loops. Sets the variables
+    /// CommonLevels, SrcLevels, and MaxLevels.
+    /// The source and destination instructions needn't be contained in the same
+    /// loop. The routine establishNestingLevels finds the level of most deeply
+    /// nested loop that contains them both, CommonLevels. An instruction that's
+    /// not contained in a loop is at level = 0. MaxLevels is equal to the level
+    /// of the source plus the level of the destination, minus CommonLevels.
+    /// This lets us allocate vectors MaxLevels in length, with room for every
+    /// distinct loop referenced in both the source and destination subscripts.
+    /// The variable SrcLevels is the nesting depth of the source instruction.
+    /// It's used to help calculate distinct loops referenced by the destination.
+    /// Here's the map from loops to levels:
+    ///            0 - unused
+    ///            1 - outermost common loop
+    ///          ... - other common loops
+    /// CommonLevels - innermost common loop
+    ///          ... - loops containing Src but not Dst
+    ///    SrcLevels - innermost loop containing Src but not Dst
+    ///          ... - loops containing Dst but not Src
+    ///    MaxLevels - innermost loop containing Dst but not Src
+    /// Consider the follow code fragment:
+    ///    for (a = ...) {
+    ///      for (b = ...) {
+    ///        for (c = ...) {
+    ///          for (d = ...) {
+    ///            A[] = ...;
+    ///          }
+    ///        }
+    ///        for (e = ...) {
+    ///          for (f = ...) {
+    ///            for (g = ...) {
+    ///              ... = A[];
+    ///            }
+    ///          }
+    ///        }
+    ///      }
+    ///    }
+    /// If we're looking at the possibility of a dependence between the store
+    /// to A (the Src) and the load from A (the Dst), we'll note that they
+    /// have 2 loops in common, so CommonLevels will equal 2 and the direction
+    /// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
+    /// A map from loop names to level indices would look like
+    ///     a - 1
+    ///     b - 2 = CommonLevels
+    ///     c - 3
+    ///     d - 4 = SrcLevels
+    ///     e - 5
+    ///     f - 6
+    ///     g - 7 = MaxLevels
+    void establishNestingLevels(const Instruction *Src,
+                                const Instruction *Dst);
+
+    unsigned CommonLevels, SrcLevels, MaxLevels;
+
+    /// mapSrcLoop - Given one of the loops containing the source, return
+    /// its level index in our numbering scheme.
+    unsigned mapSrcLoop(const Loop *SrcLoop) const;
+
+    /// mapDstLoop - Given one of the loops containing the destination,
+    /// return its level index in our numbering scheme.
+    unsigned mapDstLoop(const Loop *DstLoop) const;
+
+    /// isLoopInvariant - Returns true if Expression is loop invariant
+    /// in LoopNest.
+    bool isLoopInvariant(const SCEV *Expression, const Loop *LoopNest) const;
+
+    /// removeMatchingExtensions - Examines a subscript pair.
+    /// If the source and destination are identically sign (or zero)
+    /// extended, it strips off the extension in an effort to
+    /// simplify the actual analysis.
+    void removeMatchingExtensions(Subscript *Pair);
+
+    /// collectCommonLoops - Finds the set of loops from the LoopNest that
+    /// have a level <= CommonLevels and are referred to by the SCEV Expression.
+    void collectCommonLoops(const SCEV *Expression,
+                            const Loop *LoopNest,
+                            SmallBitVector &Loops) const;
+
+    /// checkSrcSubscript - Examines the SCEV Src, returning true iff it's
+    /// linear. Collect the set of loops mentioned by Src.
+    bool checkSrcSubscript(const SCEV *Src,
+                           const Loop *LoopNest,
+                           SmallBitVector &Loops);
+
+    /// checkDstSubscript - Examines the SCEV Dst, returning true iff it's
+    /// linear. Collect the set of loops mentioned by Dst.
+    bool checkDstSubscript(const SCEV *Dst,
+                           const Loop *LoopNest,
+                           SmallBitVector &Loops);
+
+    /// isKnownPredicate - Compare X and Y using the predicate Pred.
+    /// Basically a wrapper for SCEV::isKnownPredicate,
+    /// but tries harder, especially in the presense of sign and zero
+    /// extensions and symbolics.
+    bool isKnownPredicate(ICmpInst::Predicate Pred,
+                          const SCEV *X,
+                          const SCEV *Y) const;
+
+    /// collectUpperBound - All subscripts are the same type (on my machine,
+    /// an i64). The loop bound may be a smaller type. collectUpperBound
+    /// find the bound, if available, and zero extends it to the Type T.
+    /// (I zero extend since the bound should always be >= 0.)
+    /// If no upper bound is available, return NULL.
+    const SCEV *collectUpperBound(const Loop *l, Type *T) const;
+
+    /// collectConstantUpperBound - Calls collectUpperBound(), then
+    /// attempts to cast it to SCEVConstant. If the cast fails,
+    /// returns NULL.
+    const SCEVConstant *collectConstantUpperBound(const Loop *l, Type *T) const;
+
+    /// classifyPair - Examines the subscript pair (the Src and Dst SCEVs)
+    /// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
+    /// Collects the associated loops in a set.
+    Subscript::ClassificationKind classifyPair(const SCEV *Src,
+                                           const Loop *SrcLoopNest,
+                                           const SCEV *Dst,
+                                           const Loop *DstLoopNest,
+                                           SmallBitVector &Loops);
+
+    /// testZIV - Tests the ZIV subscript pair (Src and Dst) for dependence.
+    /// Returns true if any possible dependence is disproved.
+    /// If there might be a dependence, returns false.
+    /// If the dependence isn't proven to exist,
+    /// marks the Result as inconsistent.
+    bool testZIV(const SCEV *Src,
+                 const SCEV *Dst,
+                 FullDependence &Result) const;
+
+    /// testSIV - Tests the SIV subscript pair (Src and Dst) for dependence.
+    /// Things of the form [c1 + a1*i] and [c2 + a2*j], where
+    /// i and j are induction variables, c1 and c2 are loop invariant,
+    /// and a1 and a2 are constant.
+    /// Returns true if any possible dependence is disproved.
+    /// If there might be a dependence, returns false.
+    /// Sets appropriate direction vector entry and, when possible,
+    /// the distance vector entry.
+    /// If the dependence isn't proven to exist,
+    /// marks the Result as inconsistent.
+    bool testSIV(const SCEV *Src,
+                 const SCEV *Dst,
+                 unsigned &Level,
+                 FullDependence &Result,
+                 Constraint &NewConstraint,
+                 const SCEV *&SplitIter) const;
+
+    /// testRDIV - Tests the RDIV subscript pair (Src and Dst) for dependence.
+    /// Things of the form [c1 + a1*i] and [c2 + a2*j]
+    /// where i and j are induction variables, c1 and c2 are loop invariant,
+    /// and a1 and a2 are constant.
+    /// With minor algebra, this test can also be used for things like
+    /// [c1 + a1*i + a2*j][c2].
+    /// Returns true if any possible dependence is disproved.
+    /// If there might be a dependence, returns false.
+    /// Marks the Result as inconsistent.
+    bool testRDIV(const SCEV *Src,
+                  const SCEV *Dst,
+                  FullDependence &Result) const;
+
+    /// testMIV - Tests the MIV subscript pair (Src and Dst) for dependence.
+    /// Returns true if dependence disproved.
+    /// Can sometimes refine direction vectors.
+    bool testMIV(const SCEV *Src,
+                 const SCEV *Dst,
+                 const SmallBitVector &Loops,
+                 FullDependence &Result) const;
+
+    /// strongSIVtest - Tests the strong SIV subscript pair (Src and Dst)
+    /// for dependence.
+    /// Things of the form [c1 + a*i] and [c2 + a*i],
+    /// where i is an induction variable, c1 and c2 are loop invariant,
+    /// and a is a constant
+    /// Returns true if any possible dependence is disproved.
+    /// If there might be a dependence, returns false.
+    /// Sets appropriate direction and distance.
+    bool strongSIVtest(const SCEV *Coeff,
+                       const SCEV *SrcConst,
+                       const SCEV *DstConst,
+                       const Loop *CurrentLoop,
+                       unsigned Level,
+                       FullDependence &Result,
+                       Constraint &NewConstraint) const;
+
+    /// weakCrossingSIVtest - Tests the weak-crossing SIV subscript pair
+    /// (Src and Dst) for dependence.
+    /// Things of the form [c1 + a*i] and [c2 - a*i],
+    /// where i is an induction variable, c1 and c2 are loop invariant,
+    /// and a is a constant.
+    /// Returns true if any possible dependence is disproved.
+    /// If there might be a dependence, returns false.
+    /// Sets appropriate direction entry.
+    /// Set consistent to false.
+    /// Marks the dependence as splitable.
+    bool weakCrossingSIVtest(const SCEV *SrcCoeff,
+                             const SCEV *SrcConst,
+                             const SCEV *DstConst,
+                             const Loop *CurrentLoop,
+                             unsigned Level,
+                             FullDependence &Result,
+                             Constraint &NewConstraint,
+                             const SCEV *&SplitIter) const;
+
+    /// ExactSIVtest - Tests the SIV subscript pair
+    /// (Src and Dst) for dependence.
+    /// Things of the form [c1 + a1*i] and [c2 + a2*i],
+    /// where i is an induction variable, c1 and c2 are loop invariant,
+    /// and a1 and a2 are constant.
+    /// Returns true if any possible dependence is disproved.
+    /// If there might be a dependence, returns false.
+    /// Sets appropriate direction entry.
+    /// Set consistent to false.
+    bool exactSIVtest(const SCEV *SrcCoeff,
+                      const SCEV *DstCoeff,
+                      const SCEV *SrcConst,
+                      const SCEV *DstConst,
+                      const Loop *CurrentLoop,
+                      unsigned Level,
+                      FullDependence &Result,
+                      Constraint &NewConstraint) const;
+
+    /// weakZeroSrcSIVtest - Tests the weak-zero SIV subscript pair
+    /// (Src and Dst) for dependence.
+    /// Things of the form [c1] and [c2 + a*i],
+    /// where i is an induction variable, c1 and c2 are loop invariant,
+    /// and a is a constant. See also weakZeroDstSIVtest.
+    /// Returns true if any possible dependence is disproved.
+    /// If there might be a dependence, returns false.
+    /// Sets appropriate direction entry.
+    /// Set consistent to false.
+    /// If loop peeling will break the dependence, mark appropriately.
+    bool weakZeroSrcSIVtest(const SCEV *DstCoeff,
+                            const SCEV *SrcConst,
+                            const SCEV *DstConst,
+                            const Loop *CurrentLoop,
+                            unsigned Level,
+                            FullDependence &Result,
+                            Constraint &NewConstraint) const;
+
+    /// weakZeroDstSIVtest - Tests the weak-zero SIV subscript pair
+    /// (Src and Dst) for dependence.
+    /// Things of the form [c1 + a*i] and [c2],
+    /// where i is an induction variable, c1 and c2 are loop invariant,
+    /// and a is a constant. See also weakZeroSrcSIVtest.
+    /// Returns true if any possible dependence is disproved.
+    /// If there might be a dependence, returns false.
+    /// Sets appropriate direction entry.
+    /// Set consistent to false.
+    /// If loop peeling will break the dependence, mark appropriately.
+    bool weakZeroDstSIVtest(const SCEV *SrcCoeff,
+                            const SCEV *SrcConst,
+                            const SCEV *DstConst,
+                            const Loop *CurrentLoop,
+                            unsigned Level,
+                            FullDependence &Result,
+                            Constraint &NewConstraint) const;
+
+    /// exactRDIVtest - Tests the RDIV subscript pair for dependence.
+    /// Things of the form [c1 + a*i] and [c2 + b*j],
+    /// where i and j are induction variable, c1 and c2 are loop invariant,
+    /// and a and b are constants.
+    /// Returns true if any possible dependence is disproved.
+    /// Marks the result as inconsistant.
+    /// Works in some cases that symbolicRDIVtest doesn't,
+    /// and vice versa.
+    bool exactRDIVtest(const SCEV *SrcCoeff,
+                       const SCEV *DstCoeff,
+                       const SCEV *SrcConst,
+                       const SCEV *DstConst,
+                       const Loop *SrcLoop,
+                       const Loop *DstLoop,
+                       FullDependence &Result) const;
+
+    /// symbolicRDIVtest - Tests the RDIV subscript pair for dependence.
+    /// Things of the form [c1 + a*i] and [c2 + b*j],
+    /// where i and j are induction variable, c1 and c2 are loop invariant,
+    /// and a and b are constants.
+    /// Returns true if any possible dependence is disproved.
+    /// Marks the result as inconsistant.
+    /// Works in some cases that exactRDIVtest doesn't,
+    /// and vice versa. Can also be used as a backup for
+    /// ordinary SIV tests.
+    bool symbolicRDIVtest(const SCEV *SrcCoeff,
+                          const SCEV *DstCoeff,
+                          const SCEV *SrcConst,
+                          const SCEV *DstConst,
+                          const Loop *SrcLoop,
+                          const Loop *DstLoop) const;
+
+    /// gcdMIVtest - Tests an MIV subscript pair for dependence.
+    /// Returns true if any possible dependence is disproved.
+    /// Marks the result as inconsistant.
+    /// Can sometimes disprove the equal direction for 1 or more loops.
+    //  Can handle some symbolics that even the SIV tests don't get,
+    /// so we use it as a backup for everything.
+    bool gcdMIVtest(const SCEV *Src,
+                    const SCEV *Dst,
+                    FullDependence &Result) const;
+
+    /// banerjeeMIVtest - Tests an MIV subscript pair for dependence.
+    /// Returns true if any possible dependence is disproved.
+    /// Marks the result as inconsistant.
+    /// Computes directions.
+    bool banerjeeMIVtest(const SCEV *Src,
+                         const SCEV *Dst,
+                         const SmallBitVector &Loops,
+                         FullDependence &Result) const;
+
+    /// collectCoefficientInfo - Walks through the subscript,
+    /// collecting each coefficient, the associated loop bounds,
+    /// and recording its positive and negative parts for later use.
+    CoefficientInfo *collectCoeffInfo(const SCEV *Subscript,
+                                      bool SrcFlag,
+                                      const SCEV *&Constant) const;
+
+    /// getPositivePart - X^+ = max(X, 0).
+    ///
+    const SCEV *getPositivePart(const SCEV *X) const;
+
+    /// getNegativePart - X^- = min(X, 0).
+    ///
+    const SCEV *getNegativePart(const SCEV *X) const;
+
+    /// getLowerBound - Looks through all the bounds info and
+    /// computes the lower bound given the current direction settings
+    /// at each level.
+    const SCEV *getLowerBound(BoundInfo *Bound) const;
+
+    /// getUpperBound - Looks through all the bounds info and
+    /// computes the upper bound given the current direction settings
+    /// at each level.
+    const SCEV *getUpperBound(BoundInfo *Bound) const;
+
+    /// exploreDirections - Hierarchically expands the direction vector
+    /// search space, combining the directions of discovered dependences
+    /// in the DirSet field of Bound. Returns the number of distinct
+    /// dependences discovered. If the dependence is disproved,
+    /// it will return 0.
+    unsigned exploreDirections(unsigned Level,
+                               CoefficientInfo *A,
+                               CoefficientInfo *B,
+                               BoundInfo *Bound,
+                               const SmallBitVector &Loops,
+                               unsigned &DepthExpanded,
+                               const SCEV *Delta) const;
+
+    /// testBounds - Returns true iff the current bounds are plausible.
+    ///
+    bool testBounds(unsigned char DirKind,
+                    unsigned Level,
+                    BoundInfo *Bound,
+                    const SCEV *Delta) const;
+
+    /// findBoundsALL - Computes the upper and lower bounds for level K
+    /// using the * direction. Records them in Bound.
+    void findBoundsALL(CoefficientInfo *A,
+                       CoefficientInfo *B,
+                       BoundInfo *Bound,
+                       unsigned K) const;
+
+    /// findBoundsLT - Computes the upper and lower bounds for level K
+    /// using the < direction. Records them in Bound.
+    void findBoundsLT(CoefficientInfo *A,
+                      CoefficientInfo *B,
+                      BoundInfo *Bound,
+                      unsigned K) const;
+
+    /// findBoundsGT - Computes the upper and lower bounds for level K
+    /// using the > direction. Records them in Bound.
+    void findBoundsGT(CoefficientInfo *A,
+                      CoefficientInfo *B,
+                      BoundInfo *Bound,
+                      unsigned K) const;
+
+    /// findBoundsEQ - Computes the upper and lower bounds for level K
+    /// using the = direction. Records them in Bound.
+    void findBoundsEQ(CoefficientInfo *A,
+                      CoefficientInfo *B,
+                      BoundInfo *Bound,
+                      unsigned K) const;
+
+    /// intersectConstraints - Updates X with the intersection
+    /// of the Constraints X and Y. Returns true if X has changed.
+    bool intersectConstraints(Constraint *X,
+                              const Constraint *Y);
+
+    /// propagate - Review the constraints, looking for opportunities
+    /// to simplify a subscript pair (Src and Dst).
+    /// Return true if some simplification occurs.
+    /// If the simplification isn't exact (that is, if it is conservative
+    /// in terms of dependence), set consistent to false.
+    bool propagate(const SCEV *&Src,
+                   const SCEV *&Dst,
+                   SmallBitVector &Loops,
+                   SmallVector<Constraint, 4> &Constraints,
+                   bool &Consistent);
+
+    /// propagateDistance - Attempt to propagate a distance
+    /// constraint into a subscript pair (Src and Dst).
+    /// Return true if some simplification occurs.
+    /// If the simplification isn't exact (that is, if it is conservative
+    /// in terms of dependence), set consistent to false.
+    bool propagateDistance(const SCEV *&Src,
+                           const SCEV *&Dst,
+                           Constraint &CurConstraint,
+                           bool &Consistent);
+
+    /// propagatePoint - Attempt to propagate a point
+    /// constraint into a subscript pair (Src and Dst).
+    /// Return true if some simplification occurs.
+    bool propagatePoint(const SCEV *&Src,
+                        const SCEV *&Dst,
+                        Constraint &CurConstraint);
+
+    /// propagateLine - Attempt to propagate a line
+    /// constraint into a subscript pair (Src and Dst).
+    /// Return true if some simplification occurs.
+    /// If the simplification isn't exact (that is, if it is conservative
+    /// in terms of dependence), set consistent to false.
+    bool propagateLine(const SCEV *&Src,
+                       const SCEV *&Dst,
+                       Constraint &CurConstraint,
+                       bool &Consistent);
+
+    /// findCoefficient - Given a linear SCEV,
+    /// return the coefficient corresponding to specified loop.
+    /// If there isn't one, return the SCEV constant 0.
+    /// For example, given a*i + b*j + c*k, returning the coefficient
+    /// corresponding to the j loop would yield b.
+    const SCEV *findCoefficient(const SCEV *Expr,
+                                const Loop *TargetLoop) const;
+
+    /// zeroCoefficient - Given a linear SCEV,
+    /// return the SCEV given by zeroing out the coefficient
+    /// corresponding to the specified loop.
+    /// For example, given a*i + b*j + c*k, zeroing the coefficient
+    /// corresponding to the j loop would yield a*i + c*k.
+    const SCEV *zeroCoefficient(const SCEV *Expr,
+                                const Loop *TargetLoop) const;
+
+    /// addToCoefficient - Given a linear SCEV Expr,
+    /// return the SCEV given by adding some Value to the
+    /// coefficient corresponding to the specified TargetLoop.
+    /// For example, given a*i + b*j + c*k, adding 1 to the coefficient
+    /// corresponding to the j loop would yield a*i + (b+1)*j + c*k.
+    const SCEV *addToCoefficient(const SCEV *Expr,
+                                 const Loop *TargetLoop,
+                                 const SCEV *Value)  const;
+
+    /// updateDirection - Update direction vector entry
+    /// based on the current constraint.
+    void updateDirection(Dependence::DVEntry &Level,
+                         const Constraint &CurConstraint) const;
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    DependenceAnalysis() : FunctionPass(ID) {
+      initializeDependenceAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+    void releaseMemory();
+    void getAnalysisUsage(AnalysisUsage &) const;
+    void print(raw_ostream &, const Module * = 0) const;
+  }; // class DependenceAnalysis
+
+  /// createDependenceAnalysisPass - This creates an instance of the
+  /// DependenceAnalysis pass.
+  FunctionPass *createDependenceAnalysisPass();
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index c52f846b5ca..c127830e0e5 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -180,11 +180,20 @@ namespace llvm {
 
   //===--------------------------------------------------------------------===//
   //
+  // createDependenceAnalysisPass - This creates an instance of the
+  // DependenceAnalysis pass.
+  //
+  FunctionPass *createDependenceAnalysisPass();
+
+  //===--------------------------------------------------------------------===//
+  //
   // createLoopDependenceAnalysisPass - This creates an instance of the
   // LoopDependenceAnalysis pass.
   //
   LoopPass *createLoopDependenceAnalysisPass();
 
+  //===--------------------------------------------------------------------===//
+  //
   // Minor pass prototypes, allowing us to expose them through bugpoint and
   // analyze.
   FunctionPass *createInstCountPass();
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index a055195352a..67c9a4d14fe 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -162,7 +162,6 @@ namespace llvm {
     SCEVCouldNotCompute();
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVCouldNotCompute *S) { return true; }
     static bool classof(const SCEV *S);
   };
 
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 3f8f149cb42..3ab9c8256bb 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -22,7 +22,7 @@
 #include <set>
 
 namespace llvm {
-  class TargetLowering;
+  class ScalarTargetTransformInfo;
 
   /// Return true if the given expression is safe to expand in the sense that
   /// all materialized values are safe to speculate.
@@ -129,7 +129,7 @@ namespace llvm {
     /// representative. Return the number of phis eliminated.
     unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
                                  SmallVectorImpl<WeakVH> &DeadInsts,
-                                 const TargetLowering *TLI = NULL);
+                                 const ScalarTargetTransformInfo *STTI = NULL);
 
     /// expandCodeFor - Insert code to directly compute the specified SCEV
     /// expression into the program.  The inserted code is inserted into the
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index ded12974fac..54db7d6bcf0 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -46,7 +46,6 @@ namespace llvm {
     Type *getType() const { return V->getType(); }
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVConstant *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scConstant;
     }
@@ -68,7 +67,6 @@ namespace llvm {
     Type *getType() const { return Ty; }
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVCastExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scTruncate ||
              S->getSCEVType() == scZeroExtend ||
@@ -88,7 +86,6 @@ namespace llvm {
 
   public:
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVTruncateExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scTruncate;
     }
@@ -106,7 +103,6 @@ namespace llvm {
 
   public:
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVZeroExtendExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scZeroExtend;
     }
@@ -124,7 +120,6 @@ namespace llvm {
 
   public:
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVSignExtendExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scSignExtend;
     }
@@ -166,7 +161,6 @@ namespace llvm {
     }
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVNAryExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scAddExpr ||
              S->getSCEVType() == scMulExpr ||
@@ -188,7 +182,6 @@ namespace llvm {
 
   public:
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVCommutativeExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scAddExpr ||
              S->getSCEVType() == scMulExpr ||
@@ -223,7 +216,6 @@ namespace llvm {
     }
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVAddExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scAddExpr;
     }
@@ -242,7 +234,6 @@ namespace llvm {
 
   public:
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVMulExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scMulExpr;
     }
@@ -274,7 +265,6 @@ namespace llvm {
     }
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVUDivExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scUDivExpr;
     }
@@ -358,7 +348,6 @@ namespace llvm {
     }
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVAddRecExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scAddRecExpr;
     }
@@ -380,7 +369,6 @@ namespace llvm {
 
   public:
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVSMaxExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scSMaxExpr;
     }
@@ -402,7 +390,6 @@ namespace llvm {
 
   public:
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVUMaxExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scUMaxExpr;
     }
@@ -449,7 +436,6 @@ namespace llvm {
     Type *getType() const { return getValPtr()->getType(); }
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVUnknown *S) { return true; }
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scUnknown;
     }
diff --git a/include/llvm/Argument.h b/include/llvm/Argument.h
index 1155b974ecd..b1c22185191 100644
--- a/include/llvm/Argument.h
+++ b/include/llvm/Argument.h
@@ -81,7 +81,6 @@ public:
   /// classof - Methods for support type inquiry through isa, cast, and
   /// dyn_cast:
   ///
-  static inline bool classof(const Argument *) { return true; }
   static inline bool classof(const Value *V) {
     return V->getValueID() == ArgumentVal;
   }
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index d20a47a286e..a28aa183473 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_ATTRIBUTES_H
 #define LLVM_ATTRIBUTES_H
 
-#include "llvm/AttributesImpl.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/ADT/ArrayRef.h"
 #include <cassert>
@@ -23,13 +22,11 @@
 
 namespace llvm {
 
+class AttrBuilder;
+class AttributesImpl;
 class LLVMContext;
 class Type;
 
-/// AttributeImpl - The internal representation of the Attributes class. This is
-/// uniquified.
-class AttributesImpl;
-
 /// Attributes - A bitset of attributes.
 class Attributes {
 public:
@@ -37,15 +34,15 @@ public:
   /// should be treated by optimizations and code generation. This enumeration
   /// lists the attributes that can be associated with parameters, function
   /// results or the function itself.
-  /// 
+  ///
   /// Note that uwtable is about the ABI or the user mandating an entry in the
   /// unwind table. The nounwind attribute is about an exception passing by the
   /// function.
-  /// 
+  ///
   /// In a theoretical system that uses tables for profiling and sjlj for
   /// exceptions, they would be fully independent. In a normal system that uses
   /// tables for both, the semantics are:
-  /// 
+  ///
   /// nil                = Needs an entry because an exception might pass by.
   /// nounwind           = No need for an entry
   /// uwtable            = Needs an entry because the ABI says so and because
@@ -53,6 +50,7 @@ public:
   /// uwtable + nounwind = Needs an entry because the ABI says so.
 
   enum AttrVal {
+    // IR-Level Attributes
     None            = 0,   ///< No attributes have been set
     AddressSafety   = 1,   ///< Address safety checking is on.
     Alignment       = 2,   ///< Alignment of parameter (5 bits)
@@ -89,74 +87,26 @@ public:
     ZExt            = 27   ///< Zero extended before/after call
   };
 private:
-  AttributesImpl Attrs;
-
-  explicit Attributes(AttributesImpl *A);
+  AttributesImpl *Attrs;
+  Attributes(AttributesImpl *A) : Attrs(A) {}
 public:
   Attributes() : Attrs(0) {}
-  explicit Attributes(uint64_t Val);
-  Attributes(const Attributes &A);
-
-  class Builder {
-    friend class Attributes;
-    uint64_t Bits;
-  public:
-    Builder() : Bits(0) {}
-    Builder(const Attributes &A) : Bits(A.Raw()) {}
-
-    void clear() { Bits = 0; }
-
-    bool hasAttribute(Attributes::AttrVal A) const;
-    bool hasAttributes() const;
-    bool hasAttributes(const Attributes &A) const;
-    bool hasAlignmentAttr() const;
-
-    uint64_t getAlignment() const;
-    uint64_t getStackAlignment() const;
-
-    Builder &addAttribute(Attributes::AttrVal Val);
-    Builder &removeAttribute(Attributes::AttrVal Val);
-
-    void addAlignmentAttr(unsigned Align);
-    void addStackAlignmentAttr(unsigned Align);
-
-    void removeAttributes(const Attributes &A);
-
-    /// @brief Remove attributes that are used on functions only.
-    void removeFunctionOnlyAttrs() {
-      removeAttribute(Attributes::NoReturn)
-        .removeAttribute(Attributes::NoUnwind)
-        .removeAttribute(Attributes::ReadNone)
-        .removeAttribute(Attributes::ReadOnly)
-        .removeAttribute(Attributes::NoInline)
-        .removeAttribute(Attributes::AlwaysInline)
-        .removeAttribute(Attributes::OptimizeForSize)
-        .removeAttribute(Attributes::StackProtect)
-        .removeAttribute(Attributes::StackProtectReq)
-        .removeAttribute(Attributes::NoRedZone)
-        .removeAttribute(Attributes::NoImplicitFloat)
-        .removeAttribute(Attributes::Naked)
-        .removeAttribute(Attributes::InlineHint)
-        .removeAttribute(Attributes::StackAlignment)
-        .removeAttribute(Attributes::UWTable)
-        .removeAttribute(Attributes::NonLazyBind)
-        .removeAttribute(Attributes::ReturnsTwice)
-        .removeAttribute(Attributes::AddressSafety);
-    }
-  };
+  Attributes(const Attributes &A) : Attrs(A.Attrs) {}
+  Attributes &operator=(const Attributes &A) {
+    Attrs = A.Attrs;
+    return *this;
+  }
 
   /// get - Return a uniquified Attributes object. This takes the uniquified
   /// value from the Builder and wraps it in the Attributes class.
-  static Attributes get(Builder &B);
-  static Attributes get(LLVMContext &Context, Builder &B);
+  static Attributes get(LLVMContext &Context, ArrayRef<AttrVal> Vals);
+  static Attributes get(LLVMContext &Context, AttrBuilder &B);
 
   /// @brief Return true if the attribute is present.
   bool hasAttribute(AttrVal Val) const;
 
   /// @brief Return true if attributes exist
-  bool hasAttributes() const {
-    return Attrs.hasAttributes();
-  }
+  bool hasAttributes() const;
 
   /// @brief Return true if the attributes are a non-null intersection.
   bool hasAttributes(const Attributes &A) const;
@@ -205,91 +155,29 @@ public:
       hasAttribute(Attributes::AddressSafety);
   }
 
-  bool isEmptyOrSingleton() const;
-
-  // This is a "safe bool() operator".
-  operator const void *() const { return Attrs.Bits ? this : 0; }
-  bool operator == (const Attributes &A) const {
-    return Attrs.Bits == A.Attrs.Bits;
+  bool operator==(const Attributes &A) const {
+    return Attrs == A.Attrs;
   }
-  bool operator != (const Attributes &A) const {
-    return Attrs.Bits != A.Attrs.Bits;
+  bool operator!=(const Attributes &A) const {
+    return Attrs != A.Attrs;
   }
 
-  Attributes operator | (const Attributes &A) const;
-  Attributes operator & (const Attributes &A) const;
-  Attributes operator ^ (const Attributes &A) const;
-  Attributes &operator |= (const Attributes &A);
-  Attributes &operator &= (const Attributes &A);
-  Attributes operator ~ () const;
-
   uint64_t Raw() const;
 
-  /// constructAlignmentFromInt - This turns an int alignment (a power of 2,
-  /// normally) into the form used internally in Attributes.
-  static Attributes constructAlignmentFromInt(unsigned i) {
-    // Default alignment, allow the target to define how to align it.
-    if (i == 0)
-      return Attributes();
-
-    assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
-    assert(i <= 0x40000000 && "Alignment too large.");
-    return Attributes((Log2_32(i)+1) << 16);
-  }
-
-  /// constructStackAlignmentFromInt - This turns an int stack alignment (which
-  /// must be a power of 2) into the form used internally in Attributes.
-  static Attributes constructStackAlignmentFromInt(unsigned i) {
-    // Default alignment, allow the target to define how to align it.
-    if (i == 0)
-      return Attributes();
-
-    assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
-    assert(i <= 0x100 && "Alignment too large.");
-    return Attributes((Log2_32(i)+1) << 26);
-  }
-
   /// @brief Which attributes cannot be applied to a type.
   static Attributes typeIncompatible(Type *Ty);
 
   /// encodeLLVMAttributesForBitcode - This returns an integer containing an
   /// encoding of all the LLVM attributes found in the given attribute bitset.
   /// Any change to this encoding is a breaking change to bitcode compatibility.
-  static uint64_t encodeLLVMAttributesForBitcode(Attributes Attrs) {
-    // FIXME: It doesn't make sense to store the alignment information as an
-    // expanded out value, we should store it as a log2 value.  However, we
-    // can't just change that here without breaking bitcode compatibility.  If
-    // this ever becomes a problem in practice, we should introduce new tag
-    // numbers in the bitcode file and have those tags use a more efficiently
-    // encoded alignment field.
-
-    // Store the alignment in the bitcode as a 16-bit raw value instead of a
-    // 5-bit log2 encoded value. Shift the bits above the alignment up by 11
-    // bits.
-    uint64_t EncodedAttrs = Attrs.Raw() & 0xffff;
-    if (Attrs.hasAttribute(Attributes::Alignment))
-      EncodedAttrs |= Attrs.getAlignment() << 16;
-    EncodedAttrs |= (Attrs.Raw() & (0xfffULL << 21)) << 11;
-    return EncodedAttrs;
-  }
+  static uint64_t encodeLLVMAttributesForBitcode(Attributes Attrs);
 
   /// decodeLLVMAttributesForBitcode - This returns an attribute bitset
   /// containing the LLVM attributes that have been decoded from the given
   /// integer.  This function must stay in sync with
   /// 'encodeLLVMAttributesForBitcode'.
-  static Attributes decodeLLVMAttributesForBitcode(uint64_t EncodedAttrs) {
-    // The alignment is stored as a 16-bit raw value from bits 31--16.  We shift
-    // the bits above 31 down by 11 bits.
-    unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16;
-    assert((!Alignment || isPowerOf2_32(Alignment)) &&
-           "Alignment must be a power of two.");
-
-    Attributes Attrs(EncodedAttrs & 0xffff);
-    if (Alignment)
-      Attrs |= Attributes::constructAlignmentFromInt(Alignment);
-    Attrs |= Attributes((EncodedAttrs & (0xfffULL << 32)) >> 11);
-    return Attrs;
-  }
+  static Attributes decodeLLVMAttributesForBitcode(LLVMContext &C,
+                                                   uint64_t EncodedAttrs);
 
   /// getAsString - The set of Attributes set in Attributes is converted to a
   /// string of equivalent mnemonics. This is, presumably, for writing out the
@@ -299,6 +187,96 @@ public:
 };
 
 //===----------------------------------------------------------------------===//
+/// AttrBuilder - This class is used in conjunction with the Attributes::get
+/// method to create an Attributes object. The object itself is uniquified. The
+/// Builder's value, however, is not. So this can be used as a quick way to test
+/// for equality, presence of attributes, etc.
+class AttrBuilder {
+  uint64_t Bits;
+public:
+  AttrBuilder() : Bits(0) {}
+  explicit AttrBuilder(uint64_t B) : Bits(B) {}
+  AttrBuilder(const Attributes &A) : Bits(A.Raw()) {}
+  AttrBuilder(const AttrBuilder &B) : Bits(B.Bits) {}
+
+  void clear() { Bits = 0; }
+
+  /// addAttribute - Add an attribute to the builder.
+  AttrBuilder &addAttribute(Attributes::AttrVal Val);
+
+  /// removeAttribute - Remove an attribute from the builder.
+  AttrBuilder &removeAttribute(Attributes::AttrVal Val);
+
+  /// addAttribute - Add the attributes from A to the builder.
+  AttrBuilder &addAttributes(const Attributes &A);
+
+  /// removeAttribute - Remove the attributes from A from the builder.
+  AttrBuilder &removeAttributes(const Attributes &A);
+
+  /// hasAttribute - Return true if the builder has the specified attribute.
+  bool hasAttribute(Attributes::AttrVal A) const;
+
+  /// hasAttributes - Return true if the builder has IR-level attributes.
+  bool hasAttributes() const;
+
+  /// hasAttributes - Return true if the builder has any attribute that's in the
+  /// specified attribute.
+  bool hasAttributes(const Attributes &A) const;
+
+  /// hasAlignmentAttr - Return true if the builder has an alignment attribute.
+  bool hasAlignmentAttr() const;
+
+  /// getAlignment - Retrieve the alignment attribute, if it exists.
+  uint64_t getAlignment() const;
+
+  /// getStackAlignment - Retrieve the stack alignment attribute, if it exists.
+  uint64_t getStackAlignment() const;
+
+  /// addAlignmentAttr - This turns an int alignment (which must be a power of
+  /// 2) into the form used internally in Attributes.
+  AttrBuilder &addAlignmentAttr(unsigned Align);
+
+  /// addStackAlignmentAttr - This turns an int stack alignment (which must be a
+  /// power of 2) into the form used internally in Attributes.
+  AttrBuilder &addStackAlignmentAttr(unsigned Align);
+
+  /// addRawValue - Add the raw value to the internal representation.
+  /// N.B. This should be used ONLY for decoding LLVM bitcode!
+  AttrBuilder &addRawValue(uint64_t Val);
+
+  /// @brief Remove attributes that are used on functions only.
+  void removeFunctionOnlyAttrs() {
+    removeAttribute(Attributes::NoReturn)
+      .removeAttribute(Attributes::NoUnwind)
+      .removeAttribute(Attributes::ReadNone)
+      .removeAttribute(Attributes::ReadOnly)
+      .removeAttribute(Attributes::NoInline)
+      .removeAttribute(Attributes::AlwaysInline)
+      .removeAttribute(Attributes::OptimizeForSize)
+      .removeAttribute(Attributes::StackProtect)
+      .removeAttribute(Attributes::StackProtectReq)
+      .removeAttribute(Attributes::NoRedZone)
+      .removeAttribute(Attributes::NoImplicitFloat)
+      .removeAttribute(Attributes::Naked)
+      .removeAttribute(Attributes::InlineHint)
+      .removeAttribute(Attributes::StackAlignment)
+      .removeAttribute(Attributes::UWTable)
+      .removeAttribute(Attributes::NonLazyBind)
+      .removeAttribute(Attributes::ReturnsTwice)
+      .removeAttribute(Attributes::AddressSafety);
+  }
+
+  uint64_t Raw() const { return Bits; }
+
+  bool operator==(const AttrBuilder &B) {
+    return Bits == B.Bits;
+  }
+  bool operator!=(const AttrBuilder &B) {
+    return Bits != B.Bits;
+  }
+};
+
+//===----------------------------------------------------------------------===//
 // AttributeWithIndex
 //===----------------------------------------------------------------------===//
 
@@ -310,9 +288,9 @@ struct AttributeWithIndex {
                      ///< Index 0 is used for return value attributes.
                      ///< Index ~0U is used for function attributes.
 
-  static AttributeWithIndex get(unsigned Idx,
+  static AttributeWithIndex get(LLVMContext &C, unsigned Idx,
                                 ArrayRef<Attributes::AttrVal> Attrs) {
-    Attributes::Builder B;
+    AttrBuilder B;
 
     for (ArrayRef<Attributes::AttrVal>::iterator I = Attrs.begin(),
            E = Attrs.end(); I != E; ++I)
@@ -320,7 +298,7 @@ struct AttributeWithIndex {
 
     AttributeWithIndex P;
     P.Index = Idx;
-    P.Attrs = Attributes::get(B);
+    P.Attrs = Attributes::get(C, B);
     return P;
   }
   static AttributeWithIndex get(unsigned Idx, Attributes Attrs) {
@@ -340,6 +318,12 @@ class AttributeListImpl;
 /// AttrListPtr - This class manages the ref count for the opaque
 /// AttributeListImpl object and provides accessors for it.
 class AttrListPtr {
+public:
+  enum AttrIndex {
+    ReturnIndex = 0U,
+    FunctionIndex = ~0U
+  };
+private:
   /// AttrList - The attributes that we are managing.  This can be null
   /// to represent the empty attributes list.
   AttributeListImpl *AttrList;
@@ -359,12 +343,12 @@ public:
   /// addAttr - Add the specified attribute at the specified index to this
   /// attribute list.  Since attribute lists are immutable, this
   /// returns the new list.
-  AttrListPtr addAttr(unsigned Idx, Attributes Attrs) const;
+  AttrListPtr addAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const;
 
   /// removeAttr - Remove the specified attribute at the specified index from
   /// this attribute list.  Since attribute lists are immutable, this
   /// returns the new list.
-  AttrListPtr removeAttr(unsigned Idx, Attributes Attrs) const;
+  AttrListPtr removeAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const;
 
   //===--------------------------------------------------------------------===//
   // Attribute List Accessors
@@ -378,12 +362,12 @@ public:
   /// getRetAttributes - The attributes for the ret value are
   /// returned.
   Attributes getRetAttributes() const {
-    return getAttributes(0);
+    return getAttributes(ReturnIndex);
   }
 
   /// getFnAttributes - The function attributes are returned.
   Attributes getFnAttributes() const {
-    return getAttributes(~0U);
+    return getAttributes(FunctionIndex);
   }
 
   /// paramHasAttr - Return true if the specified parameter index has the
@@ -411,8 +395,6 @@ public:
   bool operator!=(const AttrListPtr &RHS) const
   { return AttrList != RHS.AttrList; }
 
-  void dump() const;
-
   //===--------------------------------------------------------------------===//
   // Attribute List Introspection
   //===--------------------------------------------------------------------===//
@@ -442,6 +424,8 @@ public:
   /// holds a index number plus a set of attributes.
   const AttributeWithIndex &getSlot(unsigned Slot) const;
 
+  void dump() const;
+
 private:
   explicit AttrListPtr(AttributeListImpl *L);
 
diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h
index 9ab4a74e5a5..02c2a96b6c6 100644
--- a/include/llvm/BasicBlock.h
+++ b/include/llvm/BasicBlock.h
@@ -213,7 +213,6 @@ public:
   ValueSymbolTable *getValueSymbolTable();
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const BasicBlock *) { return true; }
   static inline bool classof(const Value *V) {
     return V->getValueID() == Value::BasicBlockVal;
   }
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 3daef789d61..840f57e7526 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -409,7 +409,7 @@ public:
   }
 
   /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
-  /// the block, and return true if the block is valid.
+  /// the block, and return true if the block has an error.
   bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0) {
     // Save the current block's state on BlockScope.
     BlockScope.push_back(Block(CurCodeSize));
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
new file mode 100644
index 00000000000..90ee2342449
--- /dev/null
+++ b/include/llvm/CodeGen/CommandFlags.h
@@ -0,0 +1,228 @@
+//===-- CommandFlags.h - Register Coalescing Interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains codegen-specific flags that are shared between different
+// command line tools. The tools "llc" and "opt" both use this file to prevent
+// flag duplication.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_COMMAND_LINE_FLAGS_H
+#define LLVM_CODEGEN_COMMAND_LINE_FLAGS_H
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+using namespace llvm;
+
+cl::opt<std::string>
+MArch("march", cl::desc("Architecture to generate code for (see --version)"));
+
+cl::opt<std::string>
+MCPU("mcpu",
+     cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+     cl::value_desc("cpu-name"),
+     cl::init(""));
+
+cl::list<std::string>
+MAttrs("mattr",
+       cl::CommaSeparated,
+       cl::desc("Target specific attributes (-mattr=help for details)"),
+       cl::value_desc("a1,+a2,-a3,..."));
+
+cl::opt<Reloc::Model>
+RelocModel("relocation-model",
+           cl::desc("Choose relocation model"),
+           cl::init(Reloc::Default),
+           cl::values(
+              clEnumValN(Reloc::Default, "default",
+                      "Target default relocation model"),
+              clEnumValN(Reloc::Static, "static",
+                      "Non-relocatable code"),
+              clEnumValN(Reloc::PIC_, "pic",
+                      "Fully relocatable, position independent code"),
+              clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+                      "Relocatable external references, non-relocatable code"),
+              clEnumValEnd));
+
+cl::opt<llvm::CodeModel::Model>
+CMModel("code-model",
+        cl::desc("Choose code model"),
+        cl::init(CodeModel::Default),
+        cl::values(clEnumValN(CodeModel::Default, "default",
+                              "Target default code model"),
+                   clEnumValN(CodeModel::Small, "small",
+                              "Small code model"),
+                   clEnumValN(CodeModel::Kernel, "kernel",
+                              "Kernel code model"),
+                   clEnumValN(CodeModel::Medium, "medium",
+                              "Medium code model"),
+                   clEnumValN(CodeModel::Large, "large",
+                              "Large code model"),
+                   clEnumValEnd));
+
+cl::opt<bool>
+RelaxAll("mc-relax-all",
+         cl::desc("When used with filetype=obj, "
+                  "relax all fixups in the emitted object file"));
+
+cl::opt<TargetMachine::CodeGenFileType>
+FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
+  cl::desc("Choose a file type (not all types are supported by all targets):"),
+  cl::values(
+             clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm",
+                        "Emit an assembly ('.s') file"),
+             clEnumValN(TargetMachine::CGFT_ObjectFile, "obj",
+                        "Emit a native object ('.o') file"),
+             clEnumValN(TargetMachine::CGFT_Null, "null",
+                        "Emit nothing, for performance testing"),
+             clEnumValEnd));
+
+cl::opt<bool> DisableDotLoc("disable-dot-loc", cl::Hidden,
+                            cl::desc("Do not use .loc entries"));
+
+cl::opt<bool> DisableCFI("disable-cfi", cl::Hidden,
+                         cl::desc("Do not use .cfi_* directives"));
+
+cl::opt<bool> EnableDwarfDirectory("enable-dwarf-directory", cl::Hidden,
+                  cl::desc("Use .file directives with an explicit directory."));
+
+cl::opt<bool>
+DisableRedZone("disable-red-zone",
+               cl::desc("Do not emit code that uses the red zone."),
+               cl::init(false));
+
+cl::opt<bool>
+EnableFPMAD("enable-fp-mad",
+            cl::desc("Enable less precise MAD instructions to be generated"),
+            cl::init(false));
+
+cl::opt<bool>
+DisableFPElim("disable-fp-elim",
+              cl::desc("Disable frame pointer elimination optimization"),
+              cl::init(false));
+
+cl::opt<bool>
+DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
+  cl::init(false));
+
+cl::opt<bool>
+EnableUnsafeFPMath("enable-unsafe-fp-math",
+                cl::desc("Enable optimizations that may decrease FP precision"),
+                cl::init(false));
+
+cl::opt<bool>
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+                cl::desc("Enable FP math optimizations that assume no +-Infs"),
+                cl::init(false));
+
+cl::opt<bool>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+                   cl::desc("Enable FP math optimizations that assume no NaNs"),
+                   cl::init(false));
+
+cl::opt<bool>
+EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
+      cl::Hidden,
+      cl::desc("Force codegen to assume rounding mode can change dynamically"),
+      cl::init(false));
+
+cl::opt<bool>
+GenerateSoftFloatCalls("soft-float",
+                    cl::desc("Generate software floating point library calls"),
+                    cl::init(false));
+
+cl::opt<llvm::FloatABI::ABIType>
+FloatABIForCalls("float-abi",
+                 cl::desc("Choose float ABI type"),
+                 cl::init(FloatABI::Default),
+                 cl::values(
+                     clEnumValN(FloatABI::Default, "default",
+                                "Target default float ABI type"),
+                     clEnumValN(FloatABI::Soft, "soft",
+                                "Soft float ABI (implied by -soft-float)"),
+                     clEnumValN(FloatABI::Hard, "hard",
+                                "Hard float ABI (uses FP registers)"),
+                     clEnumValEnd));
+
+cl::opt<llvm::FPOpFusion::FPOpFusionMode>
+FuseFPOps("fp-contract",
+          cl::desc("Enable aggresive formation of fused FP ops"),
+          cl::init(FPOpFusion::Standard),
+          cl::values(
+              clEnumValN(FPOpFusion::Fast, "fast",
+                         "Fuse FP ops whenever profitable"),
+              clEnumValN(FPOpFusion::Standard, "on",
+                         "Only fuse 'blessed' FP ops."),
+              clEnumValN(FPOpFusion::Strict, "off",
+                         "Only fuse FP ops when the result won't be effected."),
+              clEnumValEnd));
+
+cl::opt<bool>
+DontPlaceZerosInBSS("nozero-initialized-in-bss",
+              cl::desc("Don't place zero-initialized symbols into bss section"),
+              cl::init(false));
+
+cl::opt<bool>
+EnableGuaranteedTailCallOpt("tailcallopt",
+  cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
+  cl::init(false));
+
+cl::opt<bool>
+DisableTailCalls("disable-tail-calls",
+                 cl::desc("Never emit tail calls"),
+                 cl::init(false));
+
+cl::opt<unsigned>
+OverrideStackAlignment("stack-alignment",
+                       cl::desc("Override default stack alignment"),
+                       cl::init(0));
+
+cl::opt<bool>
+EnableRealignStack("realign-stack",
+                   cl::desc("Realign stack if needed"),
+                   cl::init(true));
+
+cl::opt<std::string>
+TrapFuncName("trap-func", cl::Hidden,
+        cl::desc("Emit a call to trap function rather than a trap instruction"),
+        cl::init(""));
+
+cl::opt<bool>
+EnablePIE("enable-pie",
+          cl::desc("Assume the creation of a position independent executable."),
+          cl::init(false));
+
+cl::opt<bool>
+SegmentedStacks("segmented-stacks",
+                cl::desc("Use segmented stacks if possible."),
+                cl::init(false));
+
+cl::opt<bool>
+UseInitArray("use-init-array",
+             cl::desc("Use .init_array instead of .ctors."),
+             cl::init(false));
+
+cl::opt<std::string> StopAfter("stop-after",
+                            cl::desc("Stop compilation after a specific pass"),
+                            cl::value_desc("pass-name"),
+                                      cl::init(""));
+cl::opt<std::string> StartAfter("start-after",
+                          cl::desc("Resume compilation after a specific pass"),
+                          cl::value_desc("pass-name"),
+                          cl::init(""));
+
+cl::opt<unsigned>
+SSPBufferSize("stack-protector-buffer-size", cl::init(8),
+              cl::desc("Lower bound for a buffer to be considered for "
+                       "stack protection"));
+#endif
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 1e8dde12553..b421753dd53 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -65,12 +65,6 @@ namespace llvm {
     /// Live interval pointers for all the virtual registers.
     IndexedMap<LiveInterval*, VirtReg2IndexFunctor> VirtRegIntervals;
 
-    /// AllocatableRegs - A bit vector of allocatable registers.
-    BitVector AllocatableRegs;
-
-    /// ReservedRegs - A bit vector of reserved registers.
-    BitVector ReservedRegs;
-
     /// RegMaskSlots - Sorted list of instructions with register mask operands.
     /// Always use the 'r' slot, RegMasks are normal clobbers, not early
     /// clobbers.
@@ -123,18 +117,6 @@ namespace llvm {
       return VirtRegIntervals.inBounds(Reg) && VirtRegIntervals[Reg];
     }
 
-    /// isAllocatable - is the physical register reg allocatable in the current
-    /// function?
-    bool isAllocatable(unsigned reg) const {
-      return AllocatableRegs.test(reg);
-    }
-
-    /// isReserved - is the physical register reg reserved in the current
-    /// function
-    bool isReserved(unsigned reg) const {
-      return ReservedRegs.test(reg);
-    }
-
     // Interval creation.
     LiveInterval &getOrCreateInterval(unsigned Reg) {
       if (!hasInterval(Reg)) {
@@ -278,15 +260,20 @@ namespace llvm {
     /// instruction 'mi' has been moved within a basic block. This will update
     /// the live intervals for all operands of mi. Moves between basic blocks
     /// are not supported.
-    void handleMove(MachineInstr* MI);
+    ///
+    /// \param UpdateFlags Update live intervals for nonallocatable physregs.
+    void handleMove(MachineInstr* MI, bool UpdateFlags = false);
 
     /// moveIntoBundle - Update intervals for operands of MI so that they
     /// begin/end on the SlotIndex for BundleStart.
     ///
+    /// \param UpdateFlags Update live intervals for nonallocatable physregs.
+    ///
     /// Requires MI and BundleStart to have SlotIndexes, and assumes
     /// existing liveness is accurate. BundleStart should be the first
     /// instruction in the Bundle.
-    void handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart);
+    void handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart,
+                              bool UpdateFlags = false);
 
     // Register mask functions.
     //
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index d4bb409e060..3bb134b8fb2 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -126,12 +126,6 @@ private:
   /// building live intervals.
   SparseBitVector<> PHIJoins;
 
-  /// ReservedRegisters - This vector keeps track of which registers
-  /// are reserved register which are not allocatable by the target machine.
-  /// We can not track liveness for values that are in this set.
-  ///
-  BitVector ReservedRegisters;
-
 private:   // Intermediate data structures
   MachineFunction *MF;
 
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 654361f9d42..770685358ab 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -176,15 +176,24 @@ public:
   }
 
   // Add a displacement from an existing MachineOperand with an added offset.
-  const MachineInstrBuilder &addDisp(const MachineOperand &Disp,
-                                     int64_t off) const {
+  const MachineInstrBuilder &addDisp(const MachineOperand &Disp, int64_t off,
+                                     unsigned char TargetFlags = 0) const {
     switch (Disp.getType()) {
       default:
         llvm_unreachable("Unhandled operand type in addDisp()");
       case MachineOperand::MO_Immediate:
         return addImm(Disp.getImm() + off);
-      case MachineOperand::MO_GlobalAddress:
-        return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off);
+      case MachineOperand::MO_GlobalAddress: {
+        // If caller specifies new TargetFlags then use it, otherwise the
+        // default behavior is to copy the target flags from the existing
+        // MachineOperand. This means if the caller wants to clear the
+        // target flags it needs to do so explicitly.
+        if (TargetFlags)
+          return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off,
+                                  TargetFlags);
+        return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off,
+                                Disp.getTargetFlags());
+      }
     }
   }
 };
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 91d24dd0fc0..a5bc7f7d391 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -95,9 +95,6 @@ class MachineRegisterInfo {
   /// started.
   BitVector ReservedRegs;
 
-  /// AllocatableRegs - From TRI->getAllocatableSet.
-  mutable BitVector AllocatableRegs;
-
   /// LiveIns/LiveOuts - Keep track of the physical registers that are
   /// livein/liveout of the function.  Live in values are typically arguments in
   /// registers, live out values are typically return values in registers.
@@ -427,6 +424,34 @@ public:
     return !reservedRegsFrozen() || ReservedRegs.test(PhysReg);
   }
 
+  /// getReservedRegs - Returns a reference to the frozen set of reserved
+  /// registers. This method should always be preferred to calling
+  /// TRI::getReservedRegs() when possible.
+  const BitVector &getReservedRegs() const {
+    assert(reservedRegsFrozen() &&
+           "Reserved registers haven't been frozen yet. "
+           "Use TRI::getReservedRegs().");
+    return ReservedRegs;
+  }
+
+  /// isReserved - Returns true when PhysReg is a reserved register.
+  ///
+  /// Reserved registers may belong to an allocatable register class, but the
+  /// target has explicitly requested that they are not used.
+  ///
+  bool isReserved(unsigned PhysReg) const {
+    return getReservedRegs().test(PhysReg);
+  }
+
+  /// isAllocatable - Returns true when PhysReg belongs to an allocatable
+  /// register class and it hasn't been reserved.
+  ///
+  /// Allocatable registers may show up in the allocation order of some virtual
+  /// register, so a register allocator needs to track its liveness and
+  /// availability.
+  bool isAllocatable(unsigned PhysReg) const {
+    return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg);
+  }
 
   //===--------------------------------------------------------------------===//
   // LiveIn/LiveOut Management
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 93990e164d1..2b96c7abe42 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -110,6 +110,10 @@ public:
   /// Initialize the strategy after building the DAG for a new region.
   virtual void initialize(ScheduleDAGMI *DAG) = 0;
 
+  /// Notify this strategy that all roots have been released (including those
+  /// that depend on EntrySU or ExitSU).
+  virtual void registerRoots() {}
+
   /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
   /// schedule the node at the top of the unscheduled region. Otherwise it will
   /// be scheduled at the bottom.
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index 7dab4f94862..8f52d3bf47d 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -50,7 +50,6 @@ namespace llvm {
     /// classof - Methods for support type inquiry through isa, cast, and
     /// dyn_cast:
     ///
-    static inline bool classof(const PseudoSourceValue *) { return true; }
     static inline bool classof(const Value *V) {
       return V->getValueID() == PseudoSourceValueVal ||
              V->getValueID() == FixedStackPseudoSourceValueVal;
@@ -90,9 +89,6 @@ namespace llvm {
     /// classof - Methods for support type inquiry through isa, cast, and
     /// dyn_cast:
     ///
-    static inline bool classof(const FixedStackPseudoSourceValue *) {
-      return true;
-    }
     static inline bool classof(const Value *V) {
       return V->getValueID() == FixedStackPseudoSourceValueVal;
     }
diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h
index 400e1f48ce5..4467b62f237 100644
--- a/include/llvm/CodeGen/RegisterClassInfo.h
+++ b/include/llvm/CodeGen/RegisterClassInfo.h
@@ -106,25 +106,6 @@ public:
       return CalleeSaved[N-1];
     return 0;
   }
-
-  /// isReserved - Returns true when PhysReg is a reserved register.
-  ///
-  /// Reserved registers may belong to an allocatable register class, but the
-  /// target has explicitly requested that they are not used.
-  ///
-  bool isReserved(unsigned PhysReg) const {
-    return Reserved.test(PhysReg);
-  }
-
-  /// isAllocatable - Returns true when PhysReg belongs to an allocatable
-  /// register class and it hasn't been reserved.
-  ///
-  /// Allocatable registers may show up in the allocation order of some virtual
-  /// register, so a register allocator needs to track its liveness and
-  /// availability.
-  bool isAllocatable(unsigned PhysReg) const {
-    return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg);
-  }
 };
 } // end namespace llvm
 
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 3986a8dd7da..08d316992ec 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -18,6 +18,7 @@
 #define LLVM_CODEGEN_REGISTER_SCAVENGING_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 
 namespace llvm {
@@ -59,10 +60,6 @@ class RegScavenger {
   ///
   BitVector CalleeSavedRegs;
 
-  /// ReservedRegs - A bitvector of reserved registers.
-  ///
-  BitVector ReservedRegs;
-
   /// RegsAvailable - The current state of all the physical registers immediately
   /// before MBBI. One bit per physical register. If bit is set that means it's
   /// available, unset means the register is currently being used.
@@ -130,12 +127,12 @@ public:
   void setUsed(unsigned Reg);
 private:
   /// isReserved - Returns true if a register is reserved. It is never "unused".
-  bool isReserved(unsigned Reg) const { return ReservedRegs.test(Reg); }
+  bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); }
 
   /// isUsed / isUnused - Test if a register is currently being used.
   ///
   bool isUsed(unsigned Reg) const   {
-    return !RegsAvailable.test(Reg) || ReservedRegs.test(Reg);
+    return !RegsAvailable.test(Reg) || isReserved(Reg);
   }
 
   /// isAliasUsed - Is Reg or an alias currently in use?
diff --git a/include/llvm/CodeGen/ScheduleDAGILP.h b/include/llvm/CodeGen/ScheduleDAGILP.h
new file mode 100644
index 00000000000..1aa40584217
--- /dev/null
+++ b/include/llvm/CodeGen/ScheduleDAGILP.h
@@ -0,0 +1,86 @@
+//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of an ILP metric for machine level instruction scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SCHEDULEDAGILP_H
+#define LLVM_CODEGEN_SCHEDULEDAGILP_H
+
+#include "llvm/Support/DataTypes.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+class ScheduleDAGInstrs;
+class SUnit;
+
+/// \brief Represent the ILP of the subDAG rooted at a DAG node.
+struct ILPValue {
+  unsigned InstrCount;
+  unsigned Cycles;
+
+  ILPValue(): InstrCount(0), Cycles(0) {}
+
+  ILPValue(unsigned count, unsigned cycles):
+    InstrCount(count), Cycles(cycles) {}
+
+  bool isValid() const { return Cycles > 0; }
+
+  // Order by the ILP metric's value.
+  bool operator<(ILPValue RHS) const {
+    return (uint64_t)InstrCount * RHS.Cycles
+      < (uint64_t)Cycles * RHS.InstrCount;
+  }
+  bool operator>(ILPValue RHS) const {
+    return RHS < *this;
+  }
+  bool operator<=(ILPValue RHS) const {
+    return (uint64_t)InstrCount * RHS.Cycles
+      <= (uint64_t)Cycles * RHS.InstrCount;
+  }
+  bool operator>=(ILPValue RHS) const {
+    return RHS <= *this;
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  void print(raw_ostream &OS) const;
+
+  void dump() const;
+#endif
+};
+
+/// \brief Compute the values of each DAG node for an ILP metric.
+///
+/// This metric assumes that the DAG is a forest of trees with roots at the
+/// bottom of the schedule.
+class ScheduleDAGILP {
+  bool IsBottomUp;
+  std::vector<ILPValue> ILPValues;
+
+public:
+  ScheduleDAGILP(bool IsBU): IsBottomUp(IsBU) {}
+
+  /// \brief Initialize the result data with the size of the DAG.
+  void resize(unsigned NumSUnits);
+
+  /// \brief Compute the ILP metric for the subDAG at this root.
+  void computeILP(const SUnit *Root);
+
+  /// \brief Get the ILP value for a DAG node.
+  ILPValue getILP(const SUnit *SU);
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 63d47592650..2c828751080 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -662,9 +662,6 @@ public:
   ///
   void dumprWithDepth(const SelectionDAG *G = 0, unsigned depth = 100) const;
 
-
-  static bool classof(const SDNode *) { return true; }
-
   /// Profile - Gather unique data for the node.
   ///
   void Profile(FoldingSetNodeID &ID) const;
@@ -976,7 +973,6 @@ public:
   }
 
   // Methods to support isa and dyn_cast
-  static bool classof(const MemSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     // For some targets, we lower some target intrinsics to a MemIntrinsicNode
     // with either an intrinsic or a target opcode.
@@ -1061,7 +1057,6 @@ public:
   }
 
   // Methods to support isa and dyn_cast
-  static bool classof(const AtomicSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::ATOMIC_CMP_SWAP     ||
            N->getOpcode() == ISD::ATOMIC_SWAP         ||
@@ -1093,7 +1088,6 @@ public:
   }
 
   // Methods to support isa and dyn_cast
-  static bool classof(const MemIntrinsicSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     // We lower some target intrinsics to their target opcode
     // early a node with a target opcode can be of this class
@@ -1148,7 +1142,6 @@ public:
   }
   static bool isSplatMask(const int *Mask, EVT VT);
 
-  static bool classof(const ShuffleVectorSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::VECTOR_SHUFFLE;
   }
@@ -1172,7 +1165,6 @@ public:
   bool isNullValue() const { return Value->isNullValue(); }
   bool isAllOnesValue() const { return Value->isAllOnesValue(); }
 
-  static bool classof(const ConstantSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::Constant ||
            N->getOpcode() == ISD::TargetConstant;
@@ -1219,7 +1211,6 @@ public:
 
   static bool isValueValidForType(EVT VT, const APFloat& Val);
 
-  static bool classof(const ConstantFPSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::ConstantFP ||
            N->getOpcode() == ISD::TargetConstantFP;
@@ -1241,7 +1232,6 @@ public:
   // Return the address space this GlobalAddress belongs to.
   unsigned getAddressSpace() const;
 
-  static bool classof(const GlobalAddressSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::GlobalAddress ||
            N->getOpcode() == ISD::TargetGlobalAddress ||
@@ -1261,7 +1251,6 @@ public:
 
   int getIndex() const { return FI; }
 
-  static bool classof(const FrameIndexSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::FrameIndex ||
            N->getOpcode() == ISD::TargetFrameIndex;
@@ -1281,7 +1270,6 @@ public:
   int getIndex() const { return JTI; }
   unsigned char getTargetFlags() const { return TargetFlags; }
 
-  static bool classof(const JumpTableSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::JumpTable ||
            N->getOpcode() == ISD::TargetJumpTable;
@@ -1342,7 +1330,6 @@ public:
 
   Type *getType() const;
 
-  static bool classof(const ConstantPoolSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::ConstantPool ||
            N->getOpcode() == ISD::TargetConstantPool;
@@ -1366,7 +1353,6 @@ public:
   int getIndex() const { return Index; }
   int64_t getOffset() const { return Offset; }
 
-  static bool classof(const TargetIndexSDNode*) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::TargetIndex;
   }
@@ -1385,7 +1371,6 @@ public:
 
   MachineBasicBlock *getBasicBlock() const { return MBB; }
 
-  static bool classof(const BasicBlockSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::BasicBlock;
   }
@@ -1410,7 +1395,6 @@ public:
                        unsigned &SplatBitSize, bool &HasAnyUndefs,
                        unsigned MinSplatBits = 0, bool isBigEndian = false);
 
-  static inline bool classof(const BuildVectorSDNode *) { return true; }
   static inline bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::BUILD_VECTOR;
   }
@@ -1431,7 +1415,6 @@ public:
   /// getValue - return the contained Value.
   const Value *getValue() const { return V; }
 
-  static bool classof(const SrcValueSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::SRCVALUE;
   }
@@ -1446,7 +1429,6 @@ public:
   
   const MDNode *getMD() const { return MD; }
   
-  static bool classof(const MDNodeSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::MDNODE_SDNODE;
   }
@@ -1463,7 +1445,6 @@ public:
 
   unsigned getReg() const { return Reg; }
 
-  static bool classof(const RegisterSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::Register;
   }
@@ -1480,7 +1461,6 @@ public:
 
   const uint32_t *getRegMask() const { return RegMask; }
 
-  static bool classof(const RegisterMaskSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::RegisterMask;
   }
@@ -1501,7 +1481,6 @@ public:
   int64_t getOffset() const { return Offset; }
   unsigned char getTargetFlags() const { return TargetFlags; }
 
-  static bool classof(const BlockAddressSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::BlockAddress ||
            N->getOpcode() == ISD::TargetBlockAddress;
@@ -1519,7 +1498,6 @@ class EHLabelSDNode : public SDNode {
 public:
   MCSymbol *getLabel() const { return Label; }
 
-  static bool classof(const EHLabelSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::EH_LABEL;
   }
@@ -1539,7 +1517,6 @@ public:
   const char *getSymbol() const { return Symbol; }
   unsigned char getTargetFlags() const { return TargetFlags; }
 
-  static bool classof(const ExternalSymbolSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::ExternalSymbol ||
            N->getOpcode() == ISD::TargetExternalSymbol;
@@ -1557,7 +1534,6 @@ public:
 
   ISD::CondCode get() const { return Condition; }
 
-  static bool classof(const CondCodeSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::CONDCODE;
   }
@@ -1577,7 +1553,6 @@ class CvtRndSatSDNode : public SDNode {
 public:
   ISD::CvtCode getCvtCode() const { return CvtCode; }
 
-  static bool classof(const CvtRndSatSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::CONVERT_RNDSAT;
   }
@@ -1596,7 +1571,6 @@ public:
 
   EVT getVT() const { return ValueType; }
 
-  static bool classof(const VTSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::VALUETYPE;
   }
@@ -1640,7 +1614,6 @@ public:
   /// isUnindexed - Return true if this is NOT a pre/post inc/dec load/store.
   bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
 
-  static bool classof(const LSBaseSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::LOAD ||
            N->getOpcode() == ISD::STORE;
@@ -1672,7 +1645,6 @@ public:
   const SDValue &getBasePtr() const { return getOperand(1); }
   const SDValue &getOffset() const { return getOperand(2); }
 
-  static bool classof(const LoadSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::LOAD;
   }
@@ -1703,7 +1675,6 @@ public:
   const SDValue &getBasePtr() const { return getOperand(2); }
   const SDValue &getOffset() const { return getOperand(3); }
 
-  static bool classof(const StoreSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::STORE;
   }
@@ -1744,7 +1715,6 @@ public:
     MemRefsEnd = NewMemRefsEnd;
   }
 
-  static bool classof(const MachineSDNode *) { return true; }
   static bool classof(const SDNode *N) {
     return N->isMachineOpcode();
   }
diff --git a/include/llvm/Constant.h b/include/llvm/Constant.h
index 7464dce3303..7fecf4c7b45 100644
--- a/include/llvm/Constant.h
+++ b/include/llvm/Constant.h
@@ -108,8 +108,6 @@ public:
   virtual void destroyConstant() { llvm_unreachable("Not reached!"); }
 
   //// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Constant *) { return true; }
-  static inline bool classof(const GlobalValue *) { return true; }
   static inline bool classof(const Value *V) {
     return V->getValueID() >= ConstantFirstVal &&
            V->getValueID() <= ConstantLastVal;
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index 85fed4259d3..b56b9cad117 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -221,7 +221,6 @@ public:
   }
 
   /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const ConstantInt *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantIntVal;
   }
@@ -291,7 +290,6 @@ public:
     return isExactlyValue(FV);
   }
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ConstantFP *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantFPVal;
   }
@@ -334,7 +332,6 @@ public:
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   ///
-  static bool classof(const ConstantAggregateZero *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantAggregateZeroVal;
   }
@@ -367,7 +364,6 @@ public:
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ConstantArray *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantArrayVal;
   }
@@ -426,7 +422,6 @@ public:
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ConstantStruct *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantStructVal;
   }
@@ -474,7 +469,6 @@ public:
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ConstantVector *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantVectorVal;
   }
@@ -517,7 +511,6 @@ public:
   }
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ConstantPointerNull *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantPointerNullVal;
   }
@@ -639,7 +632,6 @@ public:
   
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   ///
-  static bool classof(const ConstantDataSequential *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantDataArrayVal ||
            V->getValueID() == ConstantDataVectorVal;
@@ -695,7 +687,6 @@ public:
   
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   ///
-  static bool classof(const ConstantDataArray *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantDataArrayVal;
   }
@@ -749,7 +740,6 @@ public:
   
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   ///
-  static bool classof(const ConstantDataVector *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantDataVectorVal;
   }
@@ -781,7 +771,6 @@ public:
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
   
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const BlockAddress *) { return true; }
   static inline bool classof(const Value *V) {
     return V->getValueID() == BlockAddressVal;
   }
@@ -1094,7 +1083,6 @@ public:
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ConstantExpr *) { return true; }
   static inline bool classof(const Value *V) {
     return V->getValueID() == ConstantExprVal;
   }
@@ -1159,7 +1147,6 @@ public:
   virtual void destroyConstant();
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const UndefValue *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == UndefValueVal;
   }
diff --git a/include/llvm/DataLayout.h b/include/llvm/DataLayout.h
index a24737e842b..c9ac0b7feaa 100644
--- a/include/llvm/DataLayout.h
+++ b/include/llvm/DataLayout.h
@@ -231,9 +231,7 @@ public:
   }
 
   /// Layout pointer alignment
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerABIAlignment(unsigned AS = 0)  const {
+  unsigned getPointerABIAlignment(unsigned AS)  const {
     DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
     if (val == Pointers.end()) {
       val = Pointers.find(0);
@@ -241,9 +239,7 @@ public:
     return val->second.ABIAlign;
   }
   /// Return target's alignment for stack-based pointers
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerPrefAlignment(unsigned AS = 0) const {
+  unsigned getPointerPrefAlignment(unsigned AS) const {
     DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
     if (val == Pointers.end()) {
       val = Pointers.find(0);
@@ -251,9 +247,7 @@ public:
     return val->second.PrefAlign;
   }
   /// Layout pointer size
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerSize(unsigned AS = 0)          const {
+  unsigned getPointerSize(unsigned AS)          const {
     DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
     if (val == Pointers.end()) {
       val = Pointers.find(0);
@@ -261,9 +255,7 @@ public:
     return val->second.TypeBitWidth;
   }
   /// Layout pointer size, in bits
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerSizeInBits(unsigned AS = 0)    const {
+  unsigned getPointerSizeInBits(unsigned AS)    const {
     DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
     if (val == Pointers.end()) {
       val = Pointers.find(0);
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index da1e62dc043..c862c2c8bb2 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -85,7 +85,6 @@ public:
   bool isPowerOf2ByteWidth() const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const IntegerType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == IntegerTyID;
   }
@@ -134,7 +133,6 @@ public:
   unsigned getNumParams() const { return NumContainedTys - 1; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const FunctionType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == FunctionTyID;
   }
@@ -157,7 +155,6 @@ public:
   bool indexValid(unsigned Idx) const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const CompositeType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == ArrayTyID ||
            T->getTypeID() == StructTyID ||
@@ -293,7 +290,6 @@ public:
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const StructType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == StructTyID;
   }
@@ -323,7 +319,6 @@ public:
   Type *getElementType() const { return ContainedTys[0]; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const SequentialType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == ArrayTyID ||
            T->getTypeID() == PointerTyID ||
@@ -353,7 +348,6 @@ public:
   uint64_t getNumElements() const { return NumElements; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const ArrayType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == ArrayTyID;
   }
@@ -420,7 +414,6 @@ public:
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const VectorType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == VectorTyID;
   }
@@ -452,7 +445,6 @@ public:
   inline unsigned getAddressSpace() const { return getSubclassData(); }
 
   // Implement support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const PointerType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == PointerTyID;
   }
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
index 855c926bf5c..e211e9ab52a 100644
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@@ -178,9 +178,7 @@ public:
   ///
   void addFnAttr(Attributes::AttrVal N) { 
     // Function Attributes are stored at ~0 index 
-    Attributes::Builder B;
-    B.addAttribute(N);
-    addAttribute(~0U, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), N));
   }
 
   /// removeFnAttr - Remove function attributes from this function.
@@ -278,9 +276,7 @@ public:
     return getParamAttributes(n).hasAttribute(Attributes::NoAlias);
   }
   void setDoesNotAlias(unsigned n) {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoAlias);
-    addAttribute(n, Attributes::get(B));
+    addAttribute(n, Attributes::get(getContext(), Attributes::NoAlias));
   }
 
   /// @brief Determine if the parameter can be captured.
@@ -289,9 +285,7 @@ public:
     return getParamAttributes(n).hasAttribute(Attributes::NoCapture);
   }
   void setDoesNotCapture(unsigned n) {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoCapture);
-    addAttribute(n, Attributes::get(B));
+    addAttribute(n, Attributes::get(getContext(), Attributes::NoCapture));
   }
 
   /// copyAttributesFrom - copy all additional attributes (those not needed to
@@ -404,7 +398,6 @@ public:
   void viewCFGOnly() const;
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Function *) { return true; }
   static inline bool classof(const Value *V) {
     return V->getValueID() == Value::FunctionVal;
   }
diff --git a/include/llvm/GlobalAlias.h b/include/llvm/GlobalAlias.h
index a97ecd30c9d..d0f014733fc 100644
--- a/include/llvm/GlobalAlias.h
+++ b/include/llvm/GlobalAlias.h
@@ -76,7 +76,6 @@ public:
   const GlobalValue *resolveAliasedGlobal(bool stopOnWeak = true) const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const GlobalAlias *) { return true; }
   static inline bool classof(const Value *V) {
     return V->getValueID() == Value::GlobalAliasVal;
   }
diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h
index 58d02576c17..7f7f74b1e2d 100644
--- a/include/llvm/GlobalValue.h
+++ b/include/llvm/GlobalValue.h
@@ -287,7 +287,6 @@ public:
   inline const Module *getParent() const { return Parent; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const GlobalValue *) { return true; }
   static inline bool classof(const Value *V) {
     return V->getValueID() == Value::FunctionVal ||
            V->getValueID() == Value::GlobalVariableVal ||
diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h
index 27a2ea7fb9f..b9d3f68642f 100644
--- a/include/llvm/GlobalVariable.h
+++ b/include/llvm/GlobalVariable.h
@@ -174,7 +174,6 @@ public:
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const GlobalVariable *) { return true; }
   static inline bool classof(const Value *V) {
     return V->getValueID() == Value::GlobalVariableVal;
   }
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 409246cf148..ee9b1c5852e 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -94,6 +94,7 @@ void initializeDCEPass(PassRegistry&);
 void initializeDSEPass(PassRegistry&);
 void initializeDeadInstEliminationPass(PassRegistry&);
 void initializeDeadMachineInstructionElimPass(PassRegistry&);
+void initializeDependenceAnalysisPass(PassRegistry&);
 void initializeDomOnlyPrinterPass(PassRegistry&);
 void initializeDomOnlyViewerPass(PassRegistry&);
 void initializeDomPrinterPass(PassRegistry&);
@@ -247,6 +248,7 @@ void initializeTailCallElimPass(PassRegistry&);
 void initializeTailDuplicatePassPass(PassRegistry&);
 void initializeTargetPassConfigPass(PassRegistry&);
 void initializeDataLayoutPass(PassRegistry&);
+void initializeTargetTransformInfoPass(PassRegistry&);
 void initializeTargetLibraryInfoPass(PassRegistry&);
 void initializeTwoAddressInstructionPassPass(PassRegistry&);
 void initializeTypeBasedAliasAnalysisPass(PassRegistry&);
diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h
index 58c1e84e53f..c6e0aab05e7 100644
--- a/include/llvm/InlineAsm.h
+++ b/include/llvm/InlineAsm.h
@@ -189,7 +189,6 @@ public:
   }
   
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const InlineAsm *) { return true; }
   static inline bool classof(const Value *V) {
     return V->getValueID() == Value::InlineAsmVal;
   }
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index bda9d79da75..cfc79394b22 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -73,7 +73,6 @@ public:
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const TerminatorInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->isTerminator();
   }
@@ -113,7 +112,6 @@ public:
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const UnaryInstruction *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Alloca ||
            I->getOpcode() == Instruction::Load ||
@@ -361,7 +359,6 @@ public:
   bool isExact() const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const BinaryOperator *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->isBinaryOp();
   }
@@ -611,7 +608,6 @@ public:
   static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy);
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const CastInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->isCast();
   }
@@ -816,7 +812,6 @@ public:
   static bool isFalseWhenEqual(unsigned short predicate);
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const CmpInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::ICmp ||
            I->getOpcode() == Instruction::FCmp;
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
index c85eda28f42..8aa8a56bf82 100644
--- a/include/llvm/Instruction.h
+++ b/include/llvm/Instruction.h
@@ -310,7 +310,6 @@ public:
   
   
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *) { return true; }
   static inline bool classof(const Value *V) {
     return V->getValueID() >= Value::InstructionVal;
   }
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index a1a2bd53c82..40dbbaabe69 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -112,7 +112,6 @@ public:
   bool isStaticAlloca() const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const AllocaInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return (I->getOpcode() == Instruction::Alloca);
   }
@@ -232,7 +231,6 @@ public:
 
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const LoadInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Load;
   }
@@ -350,11 +348,19 @@ public:
   static unsigned getPointerOperandIndex() { return 1U; }
 
   unsigned getPointerAddressSpace() const {
-    return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
+    if (getPointerOperand()->getType()->isPointerTy())
+      return cast<PointerType>(getPointerOperand()->getType())
+        ->getAddressSpace();
+    if (getPointerOperand()->getType()->isVectorTy()
+        && cast<VectorType>(getPointerOperand()->getType())->isPointerTy())
+      return cast<PointerType>(cast<VectorType>(
+            getPointerOperand()->getType())->getElementType())
+        ->getAddressSpace();
+    llvm_unreachable("Only a vector of pointers or pointers can be used!");
+    return 0;
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const StoreInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Store;
   }
@@ -426,7 +432,6 @@ public:
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const FenceInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Fence;
   }
@@ -526,7 +531,6 @@ public:
   }
   
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const AtomicCmpXchgInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::AtomicCmpXchg;
   }
@@ -670,7 +674,6 @@ public:
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const AtomicRMWInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::AtomicRMW;
   }
@@ -849,7 +852,6 @@ public:
   bool isInBounds() const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const GetElementPtrInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return (I->getOpcode() == Instruction::GetElementPtr);
   }
@@ -1031,7 +1033,6 @@ public:
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ICmpInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::ICmp;
   }
@@ -1141,7 +1142,6 @@ public:
   }
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const FCmpInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::FCmp;
   }
@@ -1281,9 +1281,8 @@ public:
   /// @brief Return true if the call should not be inlined.
   bool isNoInline() const { return hasFnAttr(Attributes::NoInline); }
   void setIsNoInline() {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoInline);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex,
+                 Attributes::get(getContext(), Attributes::NoInline));
   }
 
   /// @brief Return true if the call can return twice
@@ -1291,9 +1290,8 @@ public:
     return hasFnAttr(Attributes::ReturnsTwice);
   }
   void setCanReturnTwice() {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::ReturnsTwice);
-    addAttribute(~0U, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex,
+                 Attributes::get(getContext(), Attributes::ReturnsTwice));
   }
 
   /// @brief Determine if the call does not access memory.
@@ -1301,9 +1299,8 @@ public:
     return hasFnAttr(Attributes::ReadNone);
   }
   void setDoesNotAccessMemory() {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::ReadNone);
-    addAttribute(~0U, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex,
+                 Attributes::get(getContext(), Attributes::ReadNone));
   }
 
   /// @brief Determine if the call does not access or only reads memory.
@@ -1311,25 +1308,22 @@ public:
     return doesNotAccessMemory() || hasFnAttr(Attributes::ReadOnly);
   }
   void setOnlyReadsMemory() {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::ReadOnly);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex,
+                 Attributes::get(getContext(), Attributes::ReadOnly));
   }
 
   /// @brief Determine if the call cannot return.
   bool doesNotReturn() const { return hasFnAttr(Attributes::NoReturn); }
   void setDoesNotReturn() {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoReturn);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex,
+                 Attributes::get(getContext(), Attributes::NoReturn));
   }
 
   /// @brief Determine if the call cannot unwind.
   bool doesNotThrow() const { return hasFnAttr(Attributes::NoUnwind); }
-  void setDoesNotThrow(bool DoesNotThrow = true) {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoUnwind);
-    addAttribute(~0, Attributes::get(B));
+  void setDoesNotThrow() {
+    addAttribute(AttrListPtr::FunctionIndex,
+                 Attributes::get(getContext(), Attributes::NoUnwind));
   }
 
   /// @brief Determine if the call returns a structure through first
@@ -1370,7 +1364,6 @@ public:
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const CallInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Call;
   }
@@ -1476,7 +1469,6 @@ public:
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const SelectInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Select;
   }
@@ -1519,7 +1511,6 @@ public:
   static unsigned getPointerOperandIndex() { return 0U; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const VAArgInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == VAArg;
   }
@@ -1573,7 +1564,6 @@ public:
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ExtractElementInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::ExtractElement;
   }
@@ -1632,7 +1622,6 @@ public:
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const InsertElementInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::InsertElement;
   }
@@ -1713,7 +1702,6 @@ public:
 
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ShuffleVectorInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::ShuffleVector;
   }
@@ -1809,7 +1797,6 @@ public:
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ExtractValueInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::ExtractValue;
   }
@@ -1931,7 +1918,6 @@ public:
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const InsertValueInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::InsertValue;
   }
@@ -2148,7 +2134,6 @@ public:
   Value *hasConstantValue() const;
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const PHINode *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::PHI;
   }
@@ -2256,7 +2241,6 @@ public:
   void reserveClauses(unsigned Size) { growOperands(Size); }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const LandingPadInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::LandingPad;
   }
@@ -2325,7 +2309,6 @@ public:
   unsigned getNumSuccessors() const { return 0; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ReturnInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return (I->getOpcode() == Instruction::Ret);
   }
@@ -2425,7 +2408,6 @@ public:
   void swapSuccessors();
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const BranchInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return (I->getOpcode() == Instruction::Br);
   }
@@ -2836,7 +2818,6 @@ public:
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
 
-  static inline bool classof(const SwitchInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Switch;
   }
@@ -2935,7 +2916,6 @@ public:
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const IndirectBrInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::IndirectBr;
   }
@@ -3050,9 +3030,8 @@ public:
   /// @brief Return true if the call should not be inlined.
   bool isNoInline() const { return hasFnAttr(Attributes::NoInline); }
   void setIsNoInline() {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoInline);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex,
+                 Attributes::get(getContext(), Attributes::NoInline));
   }
 
   /// @brief Determine if the call does not access memory.
@@ -3060,9 +3039,8 @@ public:
     return hasFnAttr(Attributes::ReadNone);
   }
   void setDoesNotAccessMemory() {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::ReadNone);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex,
+                 Attributes::get(getContext(), Attributes::ReadNone));
   }
 
   /// @brief Determine if the call does not access or only reads memory.
@@ -3070,25 +3048,22 @@ public:
     return doesNotAccessMemory() || hasFnAttr(Attributes::ReadOnly);
   }
   void setOnlyReadsMemory() {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::ReadOnly);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex,
+                 Attributes::get(getContext(), Attributes::ReadOnly));
   }
 
   /// @brief Determine if the call cannot return.
   bool doesNotReturn() const { return hasFnAttr(Attributes::NoReturn); }
   void setDoesNotReturn() {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoReturn);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex,
+                 Attributes::get(getContext(), Attributes::NoReturn));
   }
 
   /// @brief Determine if the call cannot unwind.
   bool doesNotThrow() const { return hasFnAttr(Attributes::NoUnwind); }
   void setDoesNotThrow() {
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoUnwind);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex,
+                 Attributes::get(getContext(), Attributes::NoUnwind));
   }
 
   /// @brief Determine if the call returns a structure through first
@@ -3154,7 +3129,6 @@ public:
   unsigned getNumSuccessors() const { return 2; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const InvokeInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return (I->getOpcode() == Instruction::Invoke);
   }
@@ -3234,7 +3208,6 @@ public:
   unsigned getNumSuccessors() const { return 0; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ResumeInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Resume;
   }
@@ -3279,7 +3252,6 @@ public:
   unsigned getNumSuccessors() const { return 0; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const UnreachableInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Unreachable;
   }
@@ -3320,7 +3292,6 @@ public:
   );
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const TruncInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Trunc;
   }
@@ -3357,7 +3328,6 @@ public:
   );
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ZExtInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == ZExt;
   }
@@ -3394,7 +3364,6 @@ public:
   );
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const SExtInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == SExt;
   }
@@ -3431,7 +3400,6 @@ public:
   );
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const FPTruncInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == FPTrunc;
   }
@@ -3468,7 +3436,6 @@ public:
   );
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const FPExtInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == FPExt;
   }
@@ -3505,7 +3472,6 @@ public:
   );
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const UIToFPInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == UIToFP;
   }
@@ -3542,7 +3508,6 @@ public:
   );
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const SIToFPInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == SIToFP;
   }
@@ -3579,7 +3544,6 @@ public:
   );
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const FPToUIInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == FPToUI;
   }
@@ -3616,7 +3580,6 @@ public:
   );
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const FPToSIInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == FPToSI;
   }
@@ -3653,11 +3616,18 @@ public:
 
   /// @brief return the address space of the pointer.
   unsigned getAddressSpace() const {
-    return cast<PointerType>(getType())->getAddressSpace();
+    if (getType()->isPointerTy()) 
+      return cast<PointerType>(getType())->getAddressSpace();
+    if (getType()->isVectorTy() &&
+        cast<VectorType>(getType())->getElementType()->isPointerTy())
+      return cast<PointerType>(
+          cast<VectorType>(getType())->getElementType())
+        ->getAddressSpace();
+    llvm_unreachable("Must be a pointer or a vector of pointers.");
+    return 0;
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const IntToPtrInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == IntToPtr;
   }
@@ -3695,11 +3665,19 @@ public:
 
   /// @brief return the address space of the pointer.
   unsigned getPointerAddressSpace() const {
-    return cast<PointerType>(getOperand(0)->getType())->getAddressSpace();
+    Type *Ty = getOperand(0)->getType();
+    if (Ty->isPointerTy())
+      return cast<PointerType>(Ty)->getAddressSpace();
+    if (Ty->isVectorTy()
+        && cast<VectorType>(Ty)->getElementType()->isPointerTy())
+      return cast<PointerType>(
+          cast<VectorType>(Ty)->getElementType())
+        ->getAddressSpace();
+    llvm_unreachable("Must be a pointer or a vector of pointers.");
+    return 0;
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const PtrToIntInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == PtrToInt;
   }
@@ -3736,7 +3714,6 @@ public:
   );
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const BitCastInst *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == BitCast;
   }
diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h
index e9bf0f6759b..a31220355f6 100644
--- a/include/llvm/IntrinsicInst.h
+++ b/include/llvm/IntrinsicInst.h
@@ -45,7 +45,6 @@ namespace llvm {
     }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const IntrinsicInst *) { return true; }
     static inline bool classof(const CallInst *I) {
       if (const Function *CF = I->getCalledFunction())
         return CF->getIntrinsicID() != 0;
@@ -62,7 +61,6 @@ namespace llvm {
   public:
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const DbgInfoIntrinsic *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
       switch (I->getIntrinsicID()) {
       case Intrinsic::dbg_declare:
@@ -86,7 +84,6 @@ namespace llvm {
     MDNode *getVariable() const { return cast<MDNode>(getArgOperand(1)); }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const DbgDeclareInst *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
       return I->getIntrinsicID() == Intrinsic::dbg_declare;
     }
@@ -108,7 +105,6 @@ namespace llvm {
     MDNode *getVariable() const { return cast<MDNode>(getArgOperand(2)); }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const DbgValueInst *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
       return I->getIntrinsicID() == Intrinsic::dbg_value;
     }
@@ -175,7 +171,6 @@ namespace llvm {
     }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const MemIntrinsic *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
       switch (I->getIntrinsicID()) {
       case Intrinsic::memcpy:
@@ -205,7 +200,6 @@ namespace llvm {
     }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const MemSetInst *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
       return I->getIntrinsicID() == Intrinsic::memset;
     }
@@ -238,7 +232,6 @@ namespace llvm {
     }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const MemTransferInst *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
       return I->getIntrinsicID() == Intrinsic::memcpy ||
              I->getIntrinsicID() == Intrinsic::memmove;
@@ -254,7 +247,6 @@ namespace llvm {
   class MemCpyInst : public MemTransferInst {
   public:
     // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const MemCpyInst *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
       return I->getIntrinsicID() == Intrinsic::memcpy;
     }
@@ -268,7 +260,6 @@ namespace llvm {
   class MemMoveInst : public MemTransferInst {
   public:
     // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const MemMoveInst *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
       return I->getIntrinsicID() == Intrinsic::memmove;
     }
diff --git a/include/llvm/Intrinsics.h b/include/llvm/Intrinsics.h
index c3503889e70..3108a8e5251 100644
--- a/include/llvm/Intrinsics.h
+++ b/include/llvm/Intrinsics.h
@@ -50,7 +50,7 @@ namespace Intrinsic {
   /// Intrinsic::getType(ID) - Return the function type for an intrinsic.
   ///
   FunctionType *getType(LLVMContext &Context, ID id,
-                              ArrayRef<Type*> Tys = ArrayRef<Type*>());
+                        ArrayRef<Type*> Tys = ArrayRef<Type*>());
 
   /// Intrinsic::isOverloaded(ID) - Returns true if the intrinsic can be
   /// overloaded.
@@ -58,7 +58,7 @@ namespace Intrinsic {
 
   /// Intrinsic::getAttributes(ID) - Return the attributes for an intrinsic.
   ///
-  AttrListPtr getAttributes(ID id);
+  AttrListPtr getAttributes(LLVMContext &C, ID id);
 
   /// Intrinsic::getDeclaration(M, ID) - Create or insert an LLVM Function
   /// declaration for an intrinsic, and return it.
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index c01e4710248..4b10d0e5415 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -64,6 +64,7 @@ namespace {
       (void) llvm::createDeadCodeEliminationPass();
       (void) llvm::createDeadInstEliminationPass();
       (void) llvm::createDeadStoreEliminationPass();
+      (void) llvm::createDependenceAnalysisPass();
       (void) llvm::createDomOnlyPrinterPass();
       (void) llvm::createDomPrinterPass();
       (void) llvm::createDomOnlyViewerPass();
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 83c01ec5b98..5771415c81c 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -99,8 +99,6 @@ public:
   unsigned getLayoutOrder() const { return LayoutOrder; }
   void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
 
-  static bool classof(const MCFragment *O) { return true; }
-
   void dump();
 };
 
@@ -151,7 +149,6 @@ public:
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_Data;
   }
-  static bool classof(const MCDataFragment *) { return true; }
 };
 
 // FIXME: This current incarnation of MCInstFragment doesn't make much sense, as
@@ -213,7 +210,6 @@ public:
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_Inst;
   }
-  static bool classof(const MCInstFragment *) { return true; }
 };
 
 class MCAlignFragment : public MCFragment {
@@ -263,7 +259,6 @@ public:
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_Align;
   }
-  static bool classof(const MCAlignFragment *) { return true; }
 };
 
 class MCFillFragment : public MCFragment {
@@ -302,7 +297,6 @@ public:
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_Fill;
   }
-  static bool classof(const MCFillFragment *) { return true; }
 };
 
 class MCOrgFragment : public MCFragment {
@@ -331,7 +325,6 @@ public:
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_Org;
   }
-  static bool classof(const MCOrgFragment *) { return true; }
 };
 
 class MCLEBFragment : public MCFragment {
@@ -364,7 +357,6 @@ public:
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_LEB;
   }
-  static bool classof(const MCLEBFragment *) { return true; }
 };
 
 class MCDwarfLineAddrFragment : public MCFragment {
@@ -401,7 +393,6 @@ public:
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_Dwarf;
   }
-  static bool classof(const MCDwarfLineAddrFragment *) { return true; }
 };
 
 class MCDwarfCallFrameFragment : public MCFragment {
@@ -431,7 +422,6 @@ public:
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_DwarfFrame;
   }
-  static bool classof(const MCDwarfCallFrameFragment *) { return true; }
 };
 
 // FIXME: Should this be a separate class, or just merged into MCSection? Since
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 50328872485..4c10e5114a3 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -99,8 +99,6 @@ public:
   const MCSection *FindAssociatedSection() const;
 
   /// @}
-
-  static bool classof(const MCExpr *) { return true; }
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS, const MCExpr &E) {
@@ -132,7 +130,6 @@ public:
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Constant;
   }
-  static bool classof(const MCConstantExpr *) { return true; }
 };
 
 /// MCSymbolRefExpr - Represent a reference to a symbol from inside an
@@ -248,7 +245,6 @@ public:
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::SymbolRef;
   }
-  static bool classof(const MCSymbolRefExpr *) { return true; }
 };
 
 /// MCUnaryExpr - Unary assembler expressions.
@@ -302,7 +298,6 @@ public:
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Unary;
   }
-  static bool classof(const MCUnaryExpr *) { return true; }
 };
 
 /// MCBinaryExpr - Binary assembler expressions.
@@ -437,7 +432,6 @@ public:
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Binary;
   }
-  static bool classof(const MCBinaryExpr *) { return true; }
 };
 
 /// MCTargetExpr - This is an extension point for target-specific MCExpr
@@ -461,7 +455,6 @@ public:
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Target;
   }
-  static bool classof(const MCTargetExpr *) { return true; }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index adc960d27e0..08758cda226 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -20,6 +20,7 @@ class MCAsmLexer;
 class MCAsmParserExtension;
 class MCContext;
 class MCExpr;
+class MCParsedAsmOperand;
 class MCStreamer;
 class MCTargetAsmParser;
 class SMLoc;
@@ -73,6 +74,27 @@ public:
   /// Run - Run the parser on the input source buffer.
   virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0;
 
+  virtual void setParsingInlineAsm(bool V) = 0;
+
+  /// ParseStatement - Parse the next statement.
+  virtual bool ParseStatement() = 0;
+
+  /// getNumParsedOperands - Returns the number of MCAsmParsedOperands from the
+  /// previously parsed statement.
+  virtual unsigned getNumParsedOperands() = 0;
+
+  /// getParsedOperand - Get a MCAsmParsedOperand.
+  virtual MCParsedAsmOperand &getParsedOperand(unsigned OpNum) = 0;
+
+  /// freeParsedOperands - Free the MCAsmParsedOperands.
+  virtual void freeParsedOperands() = 0;
+
+  /// isInstruction - Was the previously parsed statement an instruction?
+  virtual bool isInstruction() = 0;
+
+  /// getOpcode - Get the opcode from the previously parsed instruction.
+  virtual unsigned getOpcode() = 0;
+
   /// Warning - Emit a warning at the location \p L, with the message \p Msg.
   ///
   /// \return The return value is true, if warnings are fatal.
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 0ce32d617ef..280145bfbc8 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -19,10 +19,34 @@ class raw_ostream;
 /// base class is used by target-independent clients and is the interface
 /// between parsing an asm instruction and recognizing it.
 class MCParsedAsmOperand {
+  /// MCOperandNum - The corresponding MCInst operand number.  Only valid when
+  /// parsing MS-style inline assembly.
+  unsigned MCOperandNum;
+
+  /// Constraint - The constraint on this operand.  Only valid when parsing
+  /// MS-style inline assembly.
+  std::string Constraint;
+
 public:
   MCParsedAsmOperand() {}
   virtual ~MCParsedAsmOperand() {}
 
+  void setConstraint(StringRef C) { Constraint = C.str(); }
+  StringRef getConstraint() { return Constraint; }
+
+  void setMCOperandNum (unsigned OpNum) { MCOperandNum = OpNum; }
+  unsigned getMCOperandNum() { return MCOperandNum; }
+
+  unsigned getNameLen() {
+    assert (getStartLoc().isValid() && "Invalid StartLoc!");
+    assert (getEndLoc().isValid() && "Invalid EndLoc!");
+    return getEndLoc().getPointer() - getStartLoc().getPointer();
+  }
+
+  StringRef getName() {
+    return StringRef(getStartLoc().getPointer(), getNameLen());
+  }
+
   /// isToken - Is this a token operand?
   virtual bool isToken() const = 0;
   /// isImm - Is this an immediate operand?
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index a92fc379e19..21fdb6bd39b 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -64,8 +64,6 @@ namespace llvm {
     /// isVirtualSection - Check whether this section is "virtual", that is
     /// has no actual object file contents.
     virtual bool isVirtualSection() const = 0;
-
-    static bool classof(const MCSection *) { return true; }
   };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
index 7eacde57f48..b050c0f442b 100644
--- a/include/llvm/MC/MCSectionCOFF.h
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -61,7 +61,6 @@ namespace llvm {
     static bool classof(const MCSection *S) {
       return S->getVariant() == SV_COFF;
     }
-    static bool classof(const MCSectionCOFF *) { return true; }
   };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 7321ca83e89..4d54465760d 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -76,7 +76,6 @@ public:
   static bool classof(const MCSection *S) {
     return S->getVariant() == SV_ELF;
   }
-  static bool classof(const MCSectionELF *) { return true; }
 
   // Return the entry size for sections with fixed-width data.
   static unsigned DetermineEntrySize(SectionKind Kind);
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 15eb4f4a768..71ea8f3e901 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -174,7 +174,6 @@ public:
   static bool classof(const MCSection *S) {
     return S->getVariant() == SV_MachO;
   }
-  static bool classof(const MCSectionMachO *) { return true; }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 44698989c17..230d27ef2ef 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -554,6 +554,11 @@ namespace llvm {
     virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
                              bool isVector);
 
+    /// PPC-related methods.
+    /// FIXME: Eventually replace it with some "target MC streamer" and move
+    /// these methods there.
+    virtual void EmitTCEntry(const MCSymbol &S);
+
     /// FinishImpl - Streamer specific finalization.
     virtual void FinishImpl() = 0;
     /// Finish - Finish emission of machine code.
diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h
index a966a6b8b32..c9ea5ae4846 100644
--- a/include/llvm/MC/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCTargetAsmParser.h
@@ -50,12 +50,6 @@ public:
   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                              SMLoc &EndLoc) = 0;
 
-  /// MapAndConstraints - Map inline assembly operands to MCInst operands
-  /// and an associated constraint.
-  typedef std::pair< unsigned, std::string > MapAndConstraint;
-  typedef SmallVector<MapAndConstraint, 4> MatchInstMapAndConstraints;
-  typedef SmallVectorImpl<MapAndConstraint> MatchInstMapAndConstraintsImpl;
-
   /// ParseInstruction - Parse one assembly instruction.
   ///
   /// The parser is positioned following the instruction name. The target
@@ -88,22 +82,6 @@ public:
   /// otherwise.
   virtual bool mnemonicIsValid(StringRef Mnemonic) = 0;
 
-  /// MatchInstruction - Recognize a series of operands of a parsed instruction
-  /// as an actual MCInst.  This returns false on success and returns true on
-  /// failure to match.
-  ///
-  /// On failure, the target parser is responsible for emitting a diagnostic
-  /// explaining the match failure.
-  virtual bool
-  MatchInstruction(SMLoc IDLoc, 
-                   SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                   MCStreamer &Out, unsigned &Kind, unsigned &Opcode,
-                   MatchInstMapAndConstraintsImpl &MapAndConstraints,
-                   unsigned &OrigErrorInfo, bool matchingInlineAsm = false) {
-    OrigErrorInfo = ~0x0;
-    return true;
-  }
-
   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
   /// This returns false on success and returns true on failure to match.
@@ -111,9 +89,10 @@ public:
   /// On failure, the target parser is responsible for emitting a diagnostic
   /// explaining the match failure.
   virtual bool
-  MatchAndEmitInstruction(SMLoc IDLoc,
+  MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                           SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                          MCStreamer &Out) = 0;
+                          MCStreamer &Out, unsigned &ErrorInfo,
+                          bool MatchingInlineAsm) = 0;
 
   /// checkTargetMatchPredicate - Validate the instruction match against
   /// any complex target predicates not expressible via match classes.
@@ -122,8 +101,7 @@ public:
   }
 
   virtual void convertToMapAndConstraints(unsigned Kind,
-                           const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                         MatchInstMapAndConstraintsImpl &MapAndConstraints) = 0;
+                      const SmallVectorImpl<MCParsedAsmOperand*> &Operands) = 0;
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index d0e65246233..0fbbb959888 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -59,7 +59,6 @@ public:
   iterator end() const { return getName().end(); }
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const MDString *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == MDStringVal;
   }
@@ -161,7 +160,6 @@ public:
   void Profile(FoldingSetNodeID &ID) const;
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const MDNode *) { return true; }
   static bool classof(const Value *V) {
     return V->getValueID() == MDNodeVal;
   }
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index 358b27a416c..f3d824960c2 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -129,7 +129,6 @@ public:
   symbol_iterator end_symbols() const;
 
   // Cast methods.
-  static inline bool classof(Archive const *v) { return true; }
   static inline bool classof(Binary const *v) {
     return v->isArchive();
   }
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index baed81827d0..d555de3accc 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -64,7 +64,6 @@ public:
 
   // Cast methods.
   unsigned int getType() const { return TypeID; }
-  static inline bool classof(const Binary *v) { return true; }
 
   // Convenience methods
   bool isObject() const {
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index ba81058ae40..d6b92ed0213 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -198,7 +198,6 @@ public:
   static inline bool classof(const Binary *v) {
     return v->isCOFF();
   }
-  static inline bool classof(const COFFObjectFile *v) { return true; }
 };
 
 }
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index 3ca69bcb153..204348c0c50 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -723,7 +723,6 @@ public:
     return v->getType() == getELFType(target_endianness == support::little,
                                       is64Bits);
   }
-  static inline bool classof(const ELFObjectFile *v) { return true; }
 };
 
 // Iterate through the version definitions, and place each Elf_Verdef
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index ecb9e256050..97cd4191aa6 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -49,7 +49,6 @@ public:
   static inline bool classof(const Binary *v) {
     return v->isMachO();
   }
-  static inline bool classof(const MachOObjectFile *v) { return true; }
 
 protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 99ddb3b87b8..41959bd34e1 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -76,13 +76,13 @@ public:
   }
 };
 
-inline bool operator ==(const DataRefImpl &a, const DataRefImpl &b) {
+inline bool operator==(const DataRefImpl &a, const DataRefImpl &b) {
   // Check bitwise identical. This is the only legal way to compare a union w/o
   // knowing which member is in use.
   return std::memcmp(&a, &b, sizeof(DataRefImpl)) == 0;
 }
 
-inline bool operator <(const DataRefImpl &a, const DataRefImpl &b) {
+inline bool operator<(const DataRefImpl &a, const DataRefImpl &b) {
   // Check bitwise identical. This is the only legal way to compare a union w/o
   // knowing which member is in use.
   return std::memcmp(&a, &b, sizeof(DataRefImpl)) < 0;
@@ -144,7 +144,7 @@ public:
   SectionRef(DataRefImpl SectionP, const ObjectFile *Owner);
 
   bool operator==(const SectionRef &Other) const;
-  bool operator <(const SectionRef &Other) const;
+  bool operator<(const SectionRef &Other) const;
 
   error_code getNext(SectionRef &Result) const;
 
@@ -208,7 +208,7 @@ public:
   SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner);
 
   bool operator==(const SymbolRef &Other) const;
-  bool operator <(const SymbolRef &Other) const;
+  bool operator<(const SymbolRef &Other) const;
 
   error_code getNext(SymbolRef &Result) const;
 
@@ -251,7 +251,7 @@ public:
   LibraryRef(DataRefImpl LibraryP, const ObjectFile *Owner);
 
   bool operator==(const LibraryRef &Other) const;
-  bool operator <(const LibraryRef &Other) const;
+  bool operator<(const LibraryRef &Other) const;
 
   error_code getNext(LibraryRef &Result) const;
 
@@ -290,8 +290,8 @@ protected:
   friend class SymbolRef;
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const = 0;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const = 0;
-  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const =0;
-  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const =0;
+  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const = 0;
+  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res)const=0;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const = 0;
   virtual error_code getSymbolType(DataRefImpl Symb,
                                    SymbolRef::Type &Res) const = 0;
@@ -317,7 +317,7 @@ protected:
   // A section is 'virtual' if its contents aren't present in the object image.
   virtual error_code isSectionVirtual(DataRefImpl Sec, bool &Res) const = 0;
   virtual error_code isSectionZeroInit(DataRefImpl Sec, bool &Res) const = 0;
-  virtual error_code isSectionReadOnlyData(DataRefImpl Sec, bool &Res) const = 0;
+  virtual error_code isSectionReadOnlyData(DataRefImpl Sec, bool &Res) const =0;
   virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
                                            bool &Result) const = 0;
   virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const = 0;
@@ -388,7 +388,6 @@ public:
   static inline bool classof(const Binary *v) {
     return v->isObject();
   }
-  static inline bool classof(const ObjectFile *v) { return true; }
 
 public:
   static ObjectFile *createCOFFObjectFile(MemoryBuffer *Object);
@@ -405,7 +404,7 @@ inline bool SymbolRef::operator==(const SymbolRef &Other) const {
   return SymbolPimpl == Other.SymbolPimpl;
 }
 
-inline bool SymbolRef::operator <(const SymbolRef &Other) const {
+inline bool SymbolRef::operator<(const SymbolRef &Other) const {
   return SymbolPimpl < Other.SymbolPimpl;
 }
 
@@ -460,7 +459,7 @@ inline bool SectionRef::operator==(const SectionRef &Other) const {
   return SectionPimpl == Other.SectionPimpl;
 }
 
-inline bool SectionRef::operator <(const SectionRef &Other) const {
+inline bool SectionRef::operator<(const SectionRef &Other) const {
   return SectionPimpl < Other.SectionPimpl;
 }
 
@@ -594,7 +593,7 @@ inline bool LibraryRef::operator==(const LibraryRef &Other) const {
   return LibraryPimpl == Other.LibraryPimpl;
 }
 
-inline bool LibraryRef::operator <(const LibraryRef &Other) const {
+inline bool LibraryRef::operator<(const LibraryRef &Other) const {
   return LibraryPimpl < Other.LibraryPimpl;
 }
 
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index 61ac163d4a1..bc5da8e8aa3 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -60,7 +60,6 @@ public:
     return Instruction::UserOp1;
   }
 
-  static inline bool classof(const Operator *) { return true; }
   static inline bool classof(const Instruction *) { return true; }
   static inline bool classof(const ConstantExpr *) { return true; }
   static inline bool classof(const Value *V) {
@@ -106,7 +105,6 @@ public:
     return (SubclassOptionalData & NoSignedWrap) != 0;
   }
 
-  static inline bool classof(const OverflowingBinaryOperator *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Add ||
            I->getOpcode() == Instruction::Sub ||
@@ -180,7 +178,6 @@ public:
   /// default precision.
   float getFPAccuracy() const;
 
-  static inline bool classof(const FPMathOperator *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getType()->isFPOrFPVectorTy();
   }
@@ -196,9 +193,6 @@ template<typename SuperClass, unsigned Opc>
 class ConcreteOperator : public SuperClass {
   ~ConcreteOperator() LLVM_DELETED_FUNCTION;
 public:
-  static inline bool classof(const ConcreteOperator<SuperClass, Opc> *) {
-    return true;
-  }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Opc;
   }
diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h
index d35febbe6d8..0c71882a77b 100644
--- a/include/llvm/Support/Casting.h
+++ b/include/llvm/Support/Casting.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_SUPPORT_CASTING_H
 #define LLVM_SUPPORT_CASTING_H
 
+#include "llvm/Support/type_traits.h"
 #include <cassert>
 
 namespace llvm {
@@ -44,13 +45,23 @@ template<typename From> struct simplify_type<const From> {
 // The core of the implementation of isa<X> is here; To and From should be
 // the names of classes.  This template can be specialized to customize the
 // implementation of isa<> without rewriting it from scratch.
-template <typename To, typename From>
+template <typename To, typename From, typename Enabler = void>
 struct isa_impl {
   static inline bool doit(const From &Val) {
     return To::classof(&Val);
   }
 };
 
+/// \brief Always allow upcasts, and perform no dynamic check for them.
+template <typename To, typename From>
+struct isa_impl<To, From,
+                typename llvm::enable_if_c<
+                  llvm::is_base_of<To, From>::value
+                >::type
+               > {
+  static inline bool doit(const From &) { return true; }
+};
+
 template <typename To, typename From> struct isa_impl_cl {
   static inline bool doit(const From &Val) {
     return isa_impl<To, From>::doit(Val);
diff --git a/include/llvm/Support/Memory.h b/include/llvm/Support/Memory.h
index 8227c84bffa..025eee7f9f3 100644
--- a/include/llvm/Support/Memory.h
+++ b/include/llvm/Support/Memory.h
@@ -98,8 +98,8 @@ namespace sys {
     /// \p ErrMsg [out] returns a string describing any error that occured.
     ///
     /// If \p Flags is MF_WRITE, the actual behavior varies
-    /// with the operating system (i.e. MF_READWRITE on Windows) and the
-    /// target architecture (i.e. MF_WRITE -> MF_READWRITE on i386).
+    /// with the operating system (i.e. MF_READ | MF_WRITE on Windows) and the
+    /// target architecture (i.e. MF_WRITE -> MF_READ | MF_WRITE on i386).
     ///
     /// \r error_success if the function was successful, or an error_code
     /// describing the failure if an error occurred.
diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h
index eacd651394c..12958fa173d 100644
--- a/include/llvm/Support/YAMLParser.h
+++ b/include/llvm/Support/YAMLParser.h
@@ -133,7 +133,6 @@ public:
   virtual void skip() {}
 
   unsigned int getType() const { return TypeID; }
-  static inline bool classof(const Node *) { return true; }
 
   void *operator new ( size_t Size
                      , BumpPtrAllocator &Alloc
@@ -166,7 +165,6 @@ class NullNode : public Node {
 public:
   NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {}
 
-  static inline bool classof(const NullNode *) { return true; }
   static inline bool classof(const Node *N) {
     return N->getType() == NK_Null;
   }
@@ -199,7 +197,6 @@ public:
   ///        This happens with escaped characters and multi-line literals.
   StringRef getValue(SmallVectorImpl<char> &Storage) const;
 
-  static inline bool classof(const ScalarNode *) { return true; }
   static inline bool classof(const Node *N) {
     return N->getType() == NK_Scalar;
   }
@@ -246,7 +243,6 @@ public:
     getValue()->skip();
   }
 
-  static inline bool classof(const KeyValueNode *) { return true; }
   static inline bool classof(const Node *N) {
     return N->getType() == NK_KeyValue;
   }
@@ -362,7 +358,6 @@ public:
     yaml::skip(*this);
   }
 
-  static inline bool classof(const MappingNode *) { return true; }
   static inline bool classof(const Node *N) {
     return N->getType() == NK_Mapping;
   }
@@ -425,7 +420,6 @@ public:
     yaml::skip(*this);
   }
 
-  static inline bool classof(const SequenceNode *) { return true; }
   static inline bool classof(const Node *N) {
     return N->getType() == NK_Sequence;
   }
@@ -450,7 +444,6 @@ public:
   StringRef getName() const { return Name; }
   Node *getTarget();
 
-  static inline bool classof(const ScalarNode *) { return true; }
   static inline bool classof(const Node *N) {
     return N->getType() == NK_Alias;
   }
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index 079dc8ce8ed..319298c1325 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -85,7 +85,6 @@ private:
   virtual void anchor();
 
 public:
-  static bool classof(const RecTy *) { return true; }
   RecTyKind getRecTyKind() const { return Kind; }
 
   RecTy(RecTyKind K) : Kind(K), ListTy(0) {}
@@ -153,7 +152,6 @@ class BitRecTy : public RecTy {
   static BitRecTy Shared;
   BitRecTy() : RecTy(BitRecTyKind) {}
 public:
-  static bool classof(const BitRecTy *) { return true; }
   static bool classof(const RecTy *RT) {
     return RT->getRecTyKind() == BitRecTyKind;
   }
@@ -199,7 +197,6 @@ class BitsRecTy : public RecTy {
   unsigned Size;
   explicit BitsRecTy(unsigned Sz) : RecTy(BitsRecTyKind), Size(Sz) {}
 public:
-  static bool classof(const BitsRecTy *) { return true; }
   static bool classof(const RecTy *RT) {
     return RT->getRecTyKind() == BitsRecTyKind;
   }
@@ -248,7 +245,6 @@ class IntRecTy : public RecTy {
   static IntRecTy Shared;
   IntRecTy() : RecTy(IntRecTyKind) {}
 public:
-  static bool classof(const IntRecTy *) { return true; }
   static bool classof(const RecTy *RT) {
     return RT->getRecTyKind() == IntRecTyKind;
   }
@@ -293,7 +289,6 @@ class StringRecTy : public RecTy {
   static StringRecTy Shared;
   StringRecTy() : RecTy(StringRecTyKind) {}
 public:
-  static bool classof(const StringRecTy *) { return true; }
   static bool classof(const RecTy *RT) {
     return RT->getRecTyKind() == StringRecTyKind;
   }
@@ -342,7 +337,6 @@ class ListRecTy : public RecTy {
   explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), Ty(T) {}
   friend ListRecTy *RecTy::getListTy();
 public:
-  static bool classof(const ListRecTy *) { return true; }
   static bool classof(const RecTy *RT) {
     return RT->getRecTyKind() == ListRecTyKind;
   }
@@ -389,7 +383,6 @@ class DagRecTy : public RecTy {
   static DagRecTy Shared;
   DagRecTy() : RecTy(DagRecTyKind) {}
 public:
-  static bool classof(const DagRecTy *) { return true; }
   static bool classof(const RecTy *RT) {
     return RT->getRecTyKind() == DagRecTyKind;
   }
@@ -436,7 +429,6 @@ class RecordRecTy : public RecTy {
   explicit RecordRecTy(Record *R) : RecTy(RecordRecTyKind), Rec(R) {}
   friend class Record;
 public:
-  static bool classof(const RecordRecTy *) { return true; }
   static bool classof(const RecTy *RT) {
     return RT->getRecTyKind() == RecordRecTyKind;
   }
@@ -485,12 +477,53 @@ RecTy *resolveTypes(RecTy *T1, RecTy *T2);
 //===----------------------------------------------------------------------===//
 
 class Init {
+protected:
+  /// \brief Discriminator enum (for isa<>, dyn_cast<>, et al.)
+  ///
+  /// This enum is laid out by a preorder traversal of the inheritance
+  /// hierarchy, and does not contain an entry for abstract classes, as per
+  /// the recommendation in docs/HowToSetUpLLVMStyleRTTI.rst.
+  ///
+  /// We also explicitly include "first" and "last" values for each
+  /// interior node of the inheritance tree, to make it easier to read the
+  /// corresponding classof().
+  ///
+  /// We could pack these a bit tighter by not having the IK_FirstXXXInit
+  /// and IK_LastXXXInit be their own values, but that would degrade
+  /// readability for really no benefit.
+  enum InitKind {
+    IK_BitInit,
+    IK_BitsInit,
+    IK_FirstTypedInit,
+    IK_DagInit,
+    IK_DefInit,
+    IK_FieldInit,
+    IK_IntInit,
+    IK_ListInit,
+    IK_FirstOpInit,
+    IK_BinOpInit,
+    IK_TernOpInit,
+    IK_UnOpInit,
+    IK_LastOpInit,
+    IK_StringInit,
+    IK_VarInit,
+    IK_VarListElementInit,
+    IK_LastTypedInit,
+    IK_UnsetInit,
+    IK_VarBitInit
+  };
+
+private:
+  const InitKind Kind;
   Init(const Init &) LLVM_DELETED_FUNCTION;
   Init &operator=(const Init &) LLVM_DELETED_FUNCTION;
   virtual void anchor();
 
+public:
+  InitKind getKind() const { return Kind; }
+
 protected:
-  Init(void) {}
+  explicit Init(InitKind K) : Kind(K) {}
 
 public:
   virtual ~Init() {}
@@ -591,9 +624,13 @@ class TypedInit : public Init {
   TypedInit &operator=(const TypedInit &Other) LLVM_DELETED_FUNCTION;
 
 protected:
-  explicit TypedInit(RecTy *T) : Ty(T) {}
+  explicit TypedInit(InitKind K, RecTy *T) : Init(K), Ty(T) {}
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() >= IK_FirstTypedInit &&
+           I->getKind() <= IK_LastTypedInit;
+  }
   RecTy *getType() const { return Ty; }
 
   virtual Init *
@@ -618,12 +655,15 @@ public:
 /// UnsetInit - ? - Represents an uninitialized value
 ///
 class UnsetInit : public Init {
-  UnsetInit() : Init() {}
+  UnsetInit() : Init(IK_UnsetInit) {}
   UnsetInit(const UnsetInit &) LLVM_DELETED_FUNCTION;
   UnsetInit &operator=(const UnsetInit &Other) LLVM_DELETED_FUNCTION;
   virtual void anchor();
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_UnsetInit;
+  }
   static UnsetInit *get();
 
   virtual Init *convertInitializerTo(RecTy *Ty) const {
@@ -644,12 +684,15 @@ public:
 class BitInit : public Init {
   bool Value;
 
-  explicit BitInit(bool V) : Value(V) {}
+  explicit BitInit(bool V) : Init(IK_BitInit), Value(V) {}
   BitInit(const BitInit &Other) LLVM_DELETED_FUNCTION;
   BitInit &operator=(BitInit &Other) LLVM_DELETED_FUNCTION;
   virtual void anchor();
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_BitInit;
+  }
   static BitInit *get(bool V);
 
   bool getValue() const { return Value; }
@@ -672,12 +715,16 @@ public:
 class BitsInit : public Init, public FoldingSetNode {
   std::vector<Init*> Bits;
 
-  BitsInit(ArrayRef<Init *> Range) : Bits(Range.begin(), Range.end()) {}
+  BitsInit(ArrayRef<Init *> Range)
+    : Init(IK_BitsInit), Bits(Range.begin(), Range.end()) {}
 
   BitsInit(const BitsInit &Other) LLVM_DELETED_FUNCTION;
   BitsInit &operator=(const BitsInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_BitsInit;
+  }
   static BitsInit *get(ArrayRef<Init *> Range);
 
   void Profile(FoldingSetNodeID &ID) const;
@@ -716,12 +763,16 @@ public:
 class IntInit : public TypedInit {
   int64_t Value;
 
-  explicit IntInit(int64_t V) : TypedInit(IntRecTy::get()), Value(V) {}
+  explicit IntInit(int64_t V)
+    : TypedInit(IK_IntInit, IntRecTy::get()), Value(V) {}
 
   IntInit(const IntInit &Other) LLVM_DELETED_FUNCTION;
   IntInit &operator=(const IntInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_IntInit;
+  }
   static IntInit *get(int64_t V);
 
   int64_t getValue() const { return Value; }
@@ -754,13 +805,16 @@ class StringInit : public TypedInit {
   std::string Value;
 
   explicit StringInit(const std::string &V)
-    : TypedInit(StringRecTy::get()), Value(V) {}
+    : TypedInit(IK_StringInit, StringRecTy::get()), Value(V) {}
 
   StringInit(const StringInit &Other) LLVM_DELETED_FUNCTION;
   StringInit &operator=(const StringInit &Other) LLVM_DELETED_FUNCTION;
   virtual void anchor();
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_StringInit;
+  }
   static StringInit *get(StringRef);
 
   const std::string &getValue() const { return Value; }
@@ -794,12 +848,16 @@ public:
 
 private:
   explicit ListInit(ArrayRef<Init *> Range, RecTy *EltTy)
-      : TypedInit(ListRecTy::get(EltTy)), Values(Range.begin(), Range.end()) {}
+    : TypedInit(IK_ListInit, ListRecTy::get(EltTy)),
+      Values(Range.begin(), Range.end()) {}
 
   ListInit(const ListInit &Other) LLVM_DELETED_FUNCTION;
   ListInit &operator=(const ListInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_ListInit;
+  }
   static ListInit *get(ArrayRef<Init *> Range, RecTy *EltTy);
 
   void Profile(FoldingSetNodeID &ID) const;
@@ -855,9 +913,13 @@ class OpInit : public TypedInit {
   OpInit &operator=(OpInit &Other) LLVM_DELETED_FUNCTION;
 
 protected:
-  explicit OpInit(RecTy *Type) : TypedInit(Type) {}
+  explicit OpInit(InitKind K, RecTy *Type) : TypedInit(K, Type) {}
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() >= IK_FirstOpInit &&
+           I->getKind() <= IK_LastOpInit;
+  }
   // Clone - Clone this operator, replacing arguments with the new list
   virtual OpInit *clone(std::vector<Init *> &Operands) const = 0;
 
@@ -889,12 +951,15 @@ private:
   Init *LHS;
 
   UnOpInit(UnaryOp opc, Init *lhs, RecTy *Type)
-      : OpInit(Type), Opc(opc), LHS(lhs) {}
+    : OpInit(IK_UnOpInit, Type), Opc(opc), LHS(lhs) {}
 
   UnOpInit(const UnOpInit &Other) LLVM_DELETED_FUNCTION;
   UnOpInit &operator=(const UnOpInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_UnOpInit;
+  }
   static UnOpInit *get(UnaryOp opc, Init *lhs, RecTy *Type);
 
   // Clone - Clone this operator, replacing arguments with the new list
@@ -932,12 +997,15 @@ private:
   Init *LHS, *RHS;
 
   BinOpInit(BinaryOp opc, Init *lhs, Init *rhs, RecTy *Type) :
-      OpInit(Type), Opc(opc), LHS(lhs), RHS(rhs) {}
+      OpInit(IK_BinOpInit, Type), Opc(opc), LHS(lhs), RHS(rhs) {}
 
   BinOpInit(const BinOpInit &Other) LLVM_DELETED_FUNCTION;
   BinOpInit &operator=(const BinOpInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_BinOpInit;
+  }
   static BinOpInit *get(BinaryOp opc, Init *lhs, Init *rhs,
                         RecTy *Type);
 
@@ -982,12 +1050,15 @@ private:
 
   TernOpInit(TernaryOp opc, Init *lhs, Init *mhs, Init *rhs,
              RecTy *Type) :
-      OpInit(Type), Opc(opc), LHS(lhs), MHS(mhs), RHS(rhs) {}
+      OpInit(IK_TernOpInit, Type), Opc(opc), LHS(lhs), MHS(mhs), RHS(rhs) {}
 
   TernOpInit(const TernOpInit &Other) LLVM_DELETED_FUNCTION;
   TernOpInit &operator=(const TernOpInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_TernOpInit;
+  }
   static TernOpInit *get(TernaryOp opc, Init *lhs,
                          Init *mhs, Init *rhs,
                          RecTy *Type);
@@ -1036,14 +1107,17 @@ class VarInit : public TypedInit {
   Init *VarName;
 
   explicit VarInit(const std::string &VN, RecTy *T)
-      : TypedInit(T), VarName(StringInit::get(VN)) {}
+      : TypedInit(IK_VarInit, T), VarName(StringInit::get(VN)) {}
   explicit VarInit(Init *VN, RecTy *T)
-      : TypedInit(T), VarName(VN) {}
+      : TypedInit(IK_VarInit, T), VarName(VN) {}
 
   VarInit(const VarInit &Other) LLVM_DELETED_FUNCTION;
   VarInit &operator=(const VarInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_VarInit;
+  }
   static VarInit *get(const std::string &VN, RecTy *T);
   static VarInit *get(Init *VN, RecTy *T);
 
@@ -1083,7 +1157,7 @@ class VarBitInit : public Init {
   TypedInit *TI;
   unsigned Bit;
 
-  VarBitInit(TypedInit *T, unsigned B) : TI(T), Bit(B) {
+  VarBitInit(TypedInit *T, unsigned B) : Init(IK_VarBitInit), TI(T), Bit(B) {
     assert(T->getType() &&
            (isa<IntRecTy>(T->getType()) ||
             (isa<BitsRecTy>(T->getType()) &&
@@ -1095,6 +1169,9 @@ class VarBitInit : public Init {
   VarBitInit &operator=(const VarBitInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_VarBitInit;
+  }
   static VarBitInit *get(TypedInit *T, unsigned B);
 
   virtual Init *convertInitializerTo(RecTy *Ty) const {
@@ -1120,8 +1197,9 @@ class VarListElementInit : public TypedInit {
   unsigned Element;
 
   VarListElementInit(TypedInit *T, unsigned E)
-      : TypedInit(cast<ListRecTy>(T->getType())->getElementType()),
-          TI(T), Element(E) {
+      : TypedInit(IK_VarListElementInit,
+                  cast<ListRecTy>(T->getType())->getElementType()),
+        TI(T), Element(E) {
     assert(T->getType() && isa<ListRecTy>(T->getType()) &&
            "Illegal VarBitInit expression!");
   }
@@ -1130,6 +1208,9 @@ class VarListElementInit : public TypedInit {
   void operator=(const VarListElementInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_VarListElementInit;
+  }
   static VarListElementInit *get(TypedInit *T, unsigned E);
 
   virtual Init *convertInitializerTo(RecTy *Ty) const {
@@ -1157,13 +1238,16 @@ public:
 class DefInit : public TypedInit {
   Record *Def;
 
-  DefInit(Record *D, RecordRecTy *T) : TypedInit(T), Def(D) {}
+  DefInit(Record *D, RecordRecTy *T) : TypedInit(IK_DefInit, T), Def(D) {}
   friend class Record;
 
   DefInit(const DefInit &Other) LLVM_DELETED_FUNCTION;
   DefInit &operator=(const DefInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_DefInit;
+  }
   static DefInit *get(Record*);
 
   virtual Init *convertInitializerTo(RecTy *Ty) const {
@@ -1201,7 +1285,7 @@ class FieldInit : public TypedInit {
   std::string FieldName;    // Field we are accessing
 
   FieldInit(Init *R, const std::string &FN)
-      : TypedInit(R->getFieldType(FN)), Rec(R), FieldName(FN) {
+      : TypedInit(IK_FieldInit, R->getFieldType(FN)), Rec(R), FieldName(FN) {
     assert(getType() && "FieldInit with non-record type!");
   }
 
@@ -1209,6 +1293,9 @@ class FieldInit : public TypedInit {
   FieldInit &operator=(const FieldInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_FieldInit;
+  }
   static FieldInit *get(Init *R, const std::string &FN);
   static FieldInit *get(Init *R, const Init *FN);
 
@@ -1242,7 +1329,7 @@ class DagInit : public TypedInit, public FoldingSetNode {
   DagInit(Init *V, const std::string &VN,
           ArrayRef<Init *> ArgRange,
           ArrayRef<std::string> NameRange)
-      : TypedInit(DagRecTy::get()), Val(V), ValName(VN),
+      : TypedInit(IK_DagInit, DagRecTy::get()), Val(V), ValName(VN),
           Args(ArgRange.begin(), ArgRange.end()),
           ArgNames(NameRange.begin(), NameRange.end()) {}
 
@@ -1250,6 +1337,9 @@ class DagInit : public TypedInit, public FoldingSetNode {
   DagInit &operator=(const DagInit &Other) LLVM_DELETED_FUNCTION;
 
 public:
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_DagInit;
+  }
   static DagInit *get(Init *V, const std::string &VN,
                       ArrayRef<Init *> ArgRange,
                       ArrayRef<std::string> NameRange);
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index b3149e960a8..ad85c7e13ae 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -146,7 +146,7 @@ public:
   // Return the pointer type for the given address space, defaults to
   // the pointer type from the data layout.
   // FIXME: The default needs to be removed once all the code is updated.
-  virtual MVT getPointerTy(uint32_t addrspace = 0) const { return PointerTy; }
+  virtual MVT getPointerTy(uint32_t AS = 0) const { return PointerTy; }
   virtual MVT getShiftAmountTy(EVT LHSTy) const;
 
   /// isSelectExpensive - Return true if the select operation is expensive for
@@ -1366,7 +1366,7 @@ public:
   }
 
   /// HandleByVal - Target-specific cleanup for formal ByVal parameters.
-  virtual void HandleByVal(CCState *, unsigned &) const {}
+  virtual void HandleByVal(CCState *, unsigned &, unsigned) const {}
 
   /// CanLowerReturn - This hook should be implemented to check whether the
   /// return values described by the Outs array can fit into the return
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 988916f9d95..18e589e2bc0 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -17,6 +17,8 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetTransformInfo.h"
+#include "llvm/Target/TargetTransformImpl.h"
 #include "llvm/ADT/StringRef.h"
 #include <cassert>
 #include <string>
@@ -107,6 +109,10 @@ public:
   virtual const TargetLowering    *getTargetLowering() const { return 0; }
   virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const{ return 0; }
   virtual const DataLayout             *getDataLayout() const { return 0; }
+  virtual const ScalarTargetTransformInfo*
+  getScalarTargetTransformInfo() const { return 0; }
+  virtual const VectorTargetTransformInfo*
+  getVectorTargetTransformInfo() const { return 0; }
 
   /// getMCAsmInfo - Return target specific asm information.
   ///
diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h
new file mode 100644
index 00000000000..7648f4f935c
--- /dev/null
+++ b/include/llvm/Target/TargetTransformImpl.h
@@ -0,0 +1,54 @@
+//=- llvm/Target/TargetTransformImpl.h - Target Loop Trans Info----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the target-specific implementations of the
+// TargetTransform interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
+#define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
+
+#include "llvm/TargetTransformInfo.h"
+
+namespace llvm {
+
+class TargetLowering;
+
+/// ScalarTargetTransformInfo - This is a default implementation for the
+/// ScalarTargetTransformInfo interface. Different targets can implement
+/// this interface differently.
+class ScalarTargetTransformImpl : public ScalarTargetTransformInfo {
+private:
+  const TargetLowering *TLI;
+
+public:
+  /// Ctor
+  explicit ScalarTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {}
+
+  virtual bool isLegalAddImmediate(int64_t imm) const;
+
+  virtual bool isLegalICmpImmediate(int64_t imm) const;
+
+  virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
+  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+
+  virtual bool isTypeLegal(Type *Ty) const;
+
+  virtual unsigned getJumpBufAlignment() const;
+
+  virtual unsigned getJumpBufSize() const;
+};
+
+class VectorTargetTransformImpl : public VectorTargetTransformInfo { };
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/TargetTransformInfo.h b/include/llvm/TargetTransformInfo.h
new file mode 100644
index 00000000000..82fc14dbd74
--- /dev/null
+++ b/include/llvm/TargetTransformInfo.h
@@ -0,0 +1,128 @@
+//===- llvm/Transforms/TargetTransformInfo.h --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass exposes codegen information to IR-level passes. Every
+// transformation that uses codegen information is broken into three parts:
+// 1. The IR-level analysis pass.
+// 2. The IR-level transformation interface which provides the needed
+//    information.
+// 3. Codegen-level implementation which uses target-specific hooks.
+//
+// This file defines #2, which is the interface that IR-level transformations
+// use for querying the codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_TARGET_TRANSFORM_INTERFACE
+#define LLVM_TRANSFORMS_TARGET_TRANSFORM_INTERFACE
+
+#include "llvm/Pass.h"
+#include "llvm/AddressingMode.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Type.h"
+
+namespace llvm {
+
+class ScalarTargetTransformInfo;
+class VectorTargetTransformInfo;
+
+/// TargetTransformInfo - This pass provides access to the codegen
+/// interfaces that are needed for IR-level transformations.
+class TargetTransformInfo : public ImmutablePass {
+private:
+  const ScalarTargetTransformInfo *STTI;
+  const VectorTargetTransformInfo *VTTI;
+public:
+  /// Default ctor.
+  ///
+  /// @note This has to exist, because this is a pass, but it should never be
+  /// used.
+  TargetTransformInfo();
+
+  explicit TargetTransformInfo(const ScalarTargetTransformInfo* S,
+                               const VectorTargetTransformInfo *V)
+    : ImmutablePass(ID), STTI(S), VTTI(V) {
+      initializeTargetTransformInfoPass(*PassRegistry::getPassRegistry());
+    }
+
+  TargetTransformInfo(const TargetTransformInfo &T) :
+    ImmutablePass(ID), STTI(T.STTI), VTTI(T.VTTI) { }
+
+  const ScalarTargetTransformInfo* getScalarTargetTransformInfo() {
+    return STTI;
+  }
+  const VectorTargetTransformInfo* getVectorTargetTransformInfo() {
+    return VTTI;
+  }
+
+  /// Pass identification, replacement for typeid.
+  static char ID;
+};
+
+// ---------------------------------------------------------------------------//
+//  The classes below are inherited and implemented by target-specific classes
+//  in the codegen.
+// ---------------------------------------------------------------------------//
+
+/// ScalarTargetTransformInfo - This interface is used by IR-level passes
+/// that need target-dependent information for generic scalar transformations.
+/// LSR, and LowerInvoke use this interface.
+class ScalarTargetTransformInfo {
+public:
+  virtual ~ScalarTargetTransformInfo() {}
+
+  /// isLegalAddImmediate - Return true if the specified immediate is legal
+  /// add immediate, that is the target has add instructions which can add
+  /// a register with the immediate without having to materialize the
+  /// immediate into a register.
+  virtual bool isLegalAddImmediate(int64_t) const {
+    return false;
+  }
+  /// isLegalICmpImmediate - Return true if the specified immediate is legal
+  /// icmp immediate, that is the target has icmp instructions which can compare
+  /// a register against the immediate without having to materialize the
+  /// immediate into a register.
+  virtual bool isLegalICmpImmediate(int64_t) const {
+    return false;
+  }
+  /// isLegalAddressingMode - Return true if the addressing mode represented by
+  /// AM is legal for this target, for a load/store of the specified type.
+  /// The type may be VoidTy, in which case only return true if the addressing
+  /// mode is legal for a load/store of any legal type.
+  /// TODO: Handle pre/postinc as well.
+  virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const {
+    return false;
+  }
+  /// isTruncateFree - Return true if it's free to truncate a value of
+  /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+  /// register EAX to i16 by referencing its sub-register AX.
+  virtual bool isTruncateFree(Type * /*Ty1*/, Type * /*Ty2*/) const {
+    return false;
+  }
+  /// Is this type legal.
+  virtual bool isTypeLegal(Type *Ty) const {
+    return false;
+  }
+  /// getJumpBufAlignment - returns the target's jmp_buf alignment in bytes
+  virtual unsigned getJumpBufAlignment() const {
+    return 0;
+  }
+  /// getJumpBufSize - returns the target's jmp_buf size in bytes.
+  virtual unsigned getJumpBufSize() const {
+    return 0;
+  }
+};
+
+class VectorTargetTransformInfo {
+  // TODO: define an interface for VectorTargetTransformInfo.
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 4b0c448acfc..8e63aaa4e87 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -34,7 +34,7 @@ ModulePass *createGCOVProfilerPass(bool EmitNotes = true, bool EmitData = true,
                                    bool UseExtraChecksum = false);
 
 // Insert AddressSanitizer (address sanity checking) instrumentation
-ModulePass *createAddressSanitizerPass();
+FunctionPass *createAddressSanitizerPass();
 // Insert ThreadSanitizer (race detection) instrumentation
 FunctionPass *createThreadSanitizerPass();
 
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index a5d8eed7462..3b665bf4b68 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -119,7 +119,7 @@ Pass *createLICMPass();
 // optional parameter used to consult the target machine whether certain
 // transformations are profitable.
 //
-Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0);
+Pass *createLoopStrengthReducePass();
 
 Pass *createGlobalMergePass(const TargetLowering *TLI = 0);
 
@@ -249,9 +249,8 @@ extern char &LowerSwitchID;
 // purpose "my LLVM-to-LLVM pass doesn't support the invoke instruction yet"
 // lowering pass.
 //
-FunctionPass *createLowerInvokePass(const TargetLowering *TLI = 0);
-FunctionPass *createLowerInvokePass(const TargetLowering *TLI,
-                                    bool useExpensiveEHSupport);
+FunctionPass *createLowerInvokePass();
+FunctionPass *createLowerInvokePass(bool useExpensiveEHSupport);
 extern char &LowerInvokePassID;
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 21dd3fbe110..fd1b5556ef2 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -186,7 +186,8 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP,
   bool isInBounds = cast<GEPOperator>(GEP)->isInBounds() && !NoAssumptions;
 
   // Build a mask for high order bits.
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  unsigned AS = cast<GEPOperator>(GEP)->getPointerAddressSpace();
+  unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
   uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
 
   for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
new file mode 100644
index 00000000000..5db2d001814
--- /dev/null
+++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -0,0 +1,43 @@
+//===- SimplifyLibCalls.h - Library call simplifier -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes an interface to build some C language libcalls for
+// optimization passes that need to call the various functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYLIBCALLS_H
+#define LLVM_TRANSFORMS_UTILS_SIMPLIFYLIBCALLS_H
+
+namespace llvm {
+  class Value;
+  class CallInst;
+  class DataLayout;
+  class TargetLibraryInfo;
+  class LibCallSimplifierImpl;
+
+  /// LibCallSimplifier - This class implements a collection of optimizations
+  /// that replace well formed calls to library functions with a more optimal
+  /// form.  For example, replacing 'printf("Hello!")' with 'puts("Hello!")'.
+  class LibCallSimplifier {
+    /// Impl - A pointer to the actual implementation of the library call
+    /// simplifier.
+    LibCallSimplifierImpl *Impl;
+  public:
+    LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI);
+    virtual ~LibCallSimplifier();
+
+    /// optimizeCall - Take the given call instruction and return a more
+    /// optimal value to replace the instruction with or 0 if a more
+    /// optimal form can't be found.
+    Value *optimizeCall(CallInst *CI);
+  };
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index 8e66ea86094..5a867045af8 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -389,9 +389,6 @@ public:
   static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
   static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
 
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Type *) { return true; }
-
   /// getPointerTo - Return a pointer to the current type.  This is equivalent
   /// to PointerType::get(Foo, AddrSpace).
   PointerType *getPointerTo(unsigned AddrSpace = 0);
diff --git a/include/llvm/User.h b/include/llvm/User.h
index ade3676260b..df303d0dd5f 100644
--- a/include/llvm/User.h
+++ b/include/llvm/User.h
@@ -176,7 +176,6 @@ public:
   void replaceUsesOfWith(Value *From, Value *To);
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const User *) { return true; }
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) || isa<Constant>(V);
   }
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index 6560a420bf6..5b19435ebaf 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -257,11 +257,6 @@ public:
   /// hasValueHandle - Return true if there is a value handle associated with
   /// this value.
   bool hasValueHandle() const { return HasValueHandle; }
-  
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *) {
-    return true; // Values are always values.
-  }
 
   /// stripPointerCasts - This method strips off any unneeded pointer casts and
   /// all-zero GEPs from the specified value, returning the original uncasted
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 87a75fd3b11..588206e9159 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -31,6 +31,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeCFGOnlyViewerPass(Registry);
   initializeCFGOnlyPrinterPass(Registry);
   initializePrintDbgInfoPass(Registry);
+  initializeDependenceAnalysisPass(Registry);
   initializeDominanceFrontierPass(Registry);
   initializeDomViewerPass(Registry);
   initializeDomPrinterPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 263bfc031fc..36903f94e25 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -286,7 +286,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       V = GEPOp->getOperand(0);
       continue;
     }
-    
+
+    unsigned AS = GEPOp->getPointerAddressSpace();
     // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
     gep_type_iterator GTI = gep_type_begin(GEPOp);
     for (User::const_op_iterator I = GEPOp->op_begin()+1,
@@ -315,7 +316,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       // If the integer type is smaller than the pointer size, it is implicitly
       // sign extended to pointer size.
       unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
-      if (TD->getPointerSizeInBits() > Width)
+      if (TD->getPointerSizeInBits(AS) > Width)
         Extension = EK_SignExt;
       
       // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
@@ -344,7 +345,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       
       // Make sure that we have a scale that makes sense for this target's
       // pointer size.
-      if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+      if (unsigned ShiftBits = 64-TD->getPointerSizeInBits(AS)) {
         Scale <<= ShiftBits;
         Scale = (int64_t)Scale >> ShiftBits;
       }
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index e461848e861..3ce888fefa4 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -13,6 +13,7 @@ add_llvm_library(LLVMAnalysis
   CodeMetrics.cpp
   ConstantFolding.cpp
   DbgInfoPrinter.cpp
+  DependenceAnalysis.cpp
   DomPrinter.cpp
   DominanceFrontier.cpp
   IVUsers.cpp
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 651a54be1b9..d6692684960 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -91,14 +91,16 @@ bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) {
     // which doesn't contain values outside the range of a pointer.
     if (isa<IntToPtrInst>(CI) && TD &&
         TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) &&
-        Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits())
+        Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits(
+          cast<IntToPtrInst>(CI)->getAddressSpace()))
       return true;
 
     // A ptrtoint cast is free so long as the result is large enough to store
     // the pointer, and a legal integer type.
     if (isa<PtrToIntInst>(CI) && TD &&
         TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) &&
-        Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits())
+        Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits(
+          cast<PtrToIntInst>(CI)->getPointerAddressSpace()))
       return true;
 
     // trunc to a native type is free (assuming the target has compare and
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index b7bf044a368..146897ad675 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -916,10 +916,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
       if (TD && CE->getOpcode() == Instruction::IntToPtr) {
         Constant *Input = CE->getOperand(0);
         unsigned InWidth = Input->getType()->getScalarSizeInBits();
-        if (TD->getPointerSizeInBits() < InWidth) {
+        unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
+        if (TD->getPointerSizeInBits(AS) < InWidth) {
           Constant *Mask = 
             ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
-                                                  TD->getPointerSizeInBits()));
+                                                  TD->getPointerSizeInBits(AS)));
           Input = ConstantExpr::getAnd(Input, Mask);
         }
         // Do a zext or trunc to get to the dest size.
@@ -932,9 +933,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
     // the int size is >= the ptr size.  This requires knowing the width of a
     // pointer, so it can't be done in ConstantExpr::getCast.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
-      if (TD &&
-          TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
-          CE->getOpcode() == Instruction::PtrToInt)
+      if (TD && CE->getOpcode() == Instruction::PtrToInt &&
+          TD->getPointerSizeInBits(
+            cast<PointerType>(CE->getOperand(0)->getType())->getAddressSpace())
+          <= CE->getType()->getScalarSizeInBits())
         return FoldBitCast(CE->getOperand(0), DestTy, *TD);
 
     return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
new file mode 100644
index 00000000000..016fe396e7d
--- /dev/null
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -0,0 +1,3781 @@
+//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// DependenceAnalysis is an LLVM pass that analyses dependences between memory
+// accesses. Currently, it is an (incomplete) implementation of the approach
+// described in
+//
+//            Practical Dependence Testing
+//            Goff, Kennedy, Tseng
+//            PLDI 1991
+//
+// There's a single entry point that analyzes the dependence between a pair
+// of memory references in a function, returning either NULL, for no dependence,
+// or a more-or-less detailed description of the dependence between them.
+//
+// Currently, the implementation cannot propagate constraints between
+// coupled RDIV subscripts and lacks a multi-subscript MIV test.
+// Both of these are conservative weaknesses;
+// that is, not a source of correctness problems.
+//
+// The implementation depends on the GEP instruction to
+// differentiate subscripts. Since Clang linearizes subscripts
+// for most arrays, we give up some precision (though the existing MIV tests
+// will help). We trust that the GEP instruction will eventually be extended.
+// In the meantime, we should explore Maslov's ideas about delinearization.
+//
+// We should pay some careful attention to the possibility of integer overflow
+// in the implementation of the various tests. This could happen with Add,
+// Subtract, or Multiply, with both APInt's and SCEV's.
+//
+// Some non-linear subscript pairs can be handled by the GCD test
+// (and perhaps other tests).
+// Should explore how often these things occur.
+//
+// Finally, it seems like certain test cases expose weaknesses in the SCEV
+// simplification, especially in the handling of sign and zero extensions.
+// It could be useful to spend time exploring these.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+//===----------------------------------------------------------------------===//
+//                                                                            //
+//                   In memory of Ken Kennedy, 1945 - 2007                    //
+//                                                                            //
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "da"
+
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstIterator.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// statistics
+
+STATISTIC(TotalArrayPairs, "Array pairs tested");
+STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
+STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
+STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
+STATISTIC(ZIVapplications, "ZIV applications");
+STATISTIC(ZIVindependence, "ZIV independence");
+STATISTIC(StrongSIVapplications, "Strong SIV applications");
+STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
+STATISTIC(StrongSIVindependence, "Strong SIV independence");
+STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
+STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
+STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
+STATISTIC(ExactSIVapplications, "Exact SIV applications");
+STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
+STATISTIC(ExactSIVindependence, "Exact SIV independence");
+STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
+STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
+STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
+STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
+STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
+STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
+STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
+STATISTIC(DeltaApplications, "Delta applications");
+STATISTIC(DeltaSuccesses, "Delta successes");
+STATISTIC(DeltaIndependence, "Delta independence");
+STATISTIC(DeltaPropagations, "Delta propagations");
+STATISTIC(GCDapplications, "GCD applications");
+STATISTIC(GCDsuccesses, "GCD successes");
+STATISTIC(GCDindependence, "GCD independence");
+STATISTIC(BanerjeeApplications, "Banerjee applications");
+STATISTIC(BanerjeeIndependence, "Banerjee independence");
+STATISTIC(BanerjeeSuccesses, "Banerjee successes");
+
+//===----------------------------------------------------------------------===//
+// basics
+
+INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
+                      "Dependence Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DependenceAnalysis, "da",
+                    "Dependence Analysis", true, true)
+
+char DependenceAnalysis::ID = 0;
+
+
+FunctionPass *llvm::createDependenceAnalysisPass() {
+  return new DependenceAnalysis();
+}
+
+
+bool DependenceAnalysis::runOnFunction(Function &F) {
+  this->F = &F;
+  AA = &getAnalysis<AliasAnalysis>();
+  SE = &getAnalysis<ScalarEvolution>();
+  LI = &getAnalysis<LoopInfo>();
+  return false;
+}
+
+
+void DependenceAnalysis::releaseMemory() {
+}
+
+
+void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<AliasAnalysis>();
+  AU.addRequiredTransitive<ScalarEvolution>();
+  AU.addRequiredTransitive<LoopInfo>();
+}
+
+
+// Used to test the dependence analyzer.
+// Looks through the function, noting the first store instruction
+// and the first load instruction
+// (which always follows the first load in our tests).
+// Calls depends() and prints out the result.
+// Ignores all other instructions.
+static
+void dumpExampleDependence(raw_ostream &OS, Function *F,
+                           DependenceAnalysis *DA) {
+  for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F);
+       SrcI != SrcE; ++SrcI) {
+    if (const StoreInst *Src = dyn_cast<StoreInst>(&*SrcI)) {
+      for (inst_iterator DstI = SrcI, DstE = inst_end(F);
+           DstI != DstE; ++DstI) {
+        if (const LoadInst *Dst = dyn_cast<LoadInst>(&*DstI)) {
+          OS << "da analyze - ";
+          if (Dependence *D = DA->depends(Src, Dst, true)) {
+            D->dump(OS);
+            for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
+              if (D->isSplitable(Level)) {
+                OS << "da analyze - split level = " << Level;
+                OS << ", iteration = " << *DA->getSplitIteration(D, Level);
+                OS << "!\n";
+              }
+            }
+            delete D;
+          }
+          else
+            OS << "none!\n";
+          return;
+        }
+      }
+    }
+  }
+}
+
+
+void DependenceAnalysis::print(raw_ostream &OS, const Module*) const {
+  dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this));
+}
+
+//===----------------------------------------------------------------------===//
+// Dependence methods
+
+// Returns true if this is an input dependence.
+bool Dependence::isInput() const {
+  return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
+}
+
+
+// Returns true if this is an output dependence.
+bool Dependence::isOutput() const {
+  return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
+}
+
+
+// Returns true if this is an flow (aka true)  dependence.
+bool Dependence::isFlow() const {
+  return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
+}
+
+
+// Returns true if this is an anti dependence.
+bool Dependence::isAnti() const {
+  return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
+}
+
+
+// Returns true if a particular level is scalar; that is,
+// if no subscript in the source or destination mention the induction
+// variable associated with the loop at this level.
+// Leave this out of line, so it will serve as a virtual method anchor
+bool Dependence::isScalar(unsigned level) const {
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FullDependence methods
+
+FullDependence::FullDependence(const Instruction *Source,
+                               const Instruction *Destination,
+                               bool PossiblyLoopIndependent,
+                               unsigned CommonLevels) :
+  Dependence(Source, Destination),
+  Levels(CommonLevels),
+  LoopIndependent(PossiblyLoopIndependent) {
+  Consistent = true;
+  DV = CommonLevels ? new DVEntry[CommonLevels] : NULL;
+}
+
+// The rest are simple getters that hide the implementation.
+
+// getDirection - Returns the direction associated with a particular level.
+unsigned FullDependence::getDirection(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].Direction;
+}
+
+
+// Returns the distance (or NULL) associated with a particular level.
+const SCEV *FullDependence::getDistance(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].Distance;
+}
+
+
+// Returns true if a particular level is scalar; that is,
+// if no subscript in the source or destination mention the induction
+// variable associated with the loop at this level.
+bool FullDependence::isScalar(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].Scalar;
+}
+
+
+// Returns true if peeling the first iteration from this loop
+// will break this dependence.
+bool FullDependence::isPeelFirst(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].PeelFirst;
+}
+
+
+// Returns true if peeling the last iteration from this loop
+// will break this dependence.
+bool FullDependence::isPeelLast(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].PeelLast;
+}
+
+
+// Returns true if splitting this loop will break the dependence.
+bool FullDependence::isSplitable(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].Splitable;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DependenceAnalysis::Constraint methods
+
+// If constraint is a point <X, Y>, returns X.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getX() const {
+  assert(Kind == Point && "Kind should be Point");
+  return A;
+}
+
+
+// If constraint is a point <X, Y>, returns Y.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getY() const {
+  assert(Kind == Point && "Kind should be Point");
+  return B;
+}
+
+
+// If constraint is a line AX + BY = C, returns A.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getA() const {
+  assert((Kind == Line || Kind == Distance) &&
+         "Kind should be Line (or Distance)");
+  return A;
+}
+
+
+// If constraint is a line AX + BY = C, returns B.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getB() const {
+  assert((Kind == Line || Kind == Distance) &&
+         "Kind should be Line (or Distance)");
+  return B;
+}
+
+
+// If constraint is a line AX + BY = C, returns C.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getC() const {
+  assert((Kind == Line || Kind == Distance) &&
+         "Kind should be Line (or Distance)");
+  return C;
+}
+
+
+// If constraint is a distance, returns D.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getD() const {
+  assert(Kind == Distance && "Kind should be Distance");
+  return SE->getNegativeSCEV(C);
+}
+
+
+// Returns the loop associated with this constraint.
+const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const {
+  assert((Kind == Distance || Kind == Line || Kind == Point) &&
+         "Kind should be Distance, Line, or Point");
+  return AssociatedLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setPoint(const SCEV *X,
+                                              const SCEV *Y,
+                                              const Loop *CurLoop) {
+  Kind = Point;
+  A = X;
+  B = Y;
+  AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setLine(const SCEV *AA,
+                                             const SCEV *BB,
+                                             const SCEV *CC,
+                                             const Loop *CurLoop) {
+  Kind = Line;
+  A = AA;
+  B = BB;
+  C = CC;
+  AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setDistance(const SCEV *D,
+                                                 const Loop *CurLoop) {
+  Kind = Distance;
+  A = SE->getConstant(D->getType(), 1);
+  B = SE->getNegativeSCEV(A);
+  C = SE->getNegativeSCEV(D);
+  AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setEmpty() {
+  Kind = Empty;
+}
+
+
+void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) {
+  SE = NewSE;
+  Kind = Any;
+}
+
+
+// For debugging purposes. Dumps the constraint out to OS.
+void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const {
+  if (isEmpty())
+    OS << " Empty\n";
+  else if (isAny())
+    OS << " Any\n";
+  else if (isPoint())
+    OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
+  else if (isDistance())
+    OS << " Distance is " << *getD() <<
+      " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
+  else if (isLine())
+    OS << " Line is " << *getA() << "*X + " <<
+      *getB() << "*Y = " << *getC() << "\n";
+  else
+    llvm_unreachable("unknown constraint type in Constraint::dump");
+}
+
+
+// Updates X with the intersection
+// of the Constraints X and Y. Returns true if X has changed.
+// Corresponds to Figure 4 from the paper
+//
+//            Practical Dependence Testing
+//            Goff, Kennedy, Tseng
+//            PLDI 1991
+bool DependenceAnalysis::intersectConstraints(Constraint *X,
+                                              const Constraint *Y) {
+  ++DeltaApplications;
+  DEBUG(dbgs() << "\tintersect constraints\n");
+  DEBUG(dbgs() << "\t    X ="; X->dump(dbgs()));
+  DEBUG(dbgs() << "\t    Y ="; Y->dump(dbgs()));
+  assert(!Y->isPoint() && "Y must not be a Point");
+  if (X->isAny()) {
+    if (Y->isAny())
+      return false;
+    *X = *Y;
+    return true;
+  }
+  if (X->isEmpty())
+    return false;
+  if (Y->isEmpty()) {
+    X->setEmpty();
+    return true;
+  }
+
+  if (X->isDistance() && Y->isDistance()) {
+    DEBUG(dbgs() << "\t    intersect 2 distances\n");
+    if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
+      return false;
+    if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
+      X->setEmpty();
+      ++DeltaSuccesses;
+      return true;
+    }
+    // Hmmm, interesting situation.
+    // I guess if either is constant, keep it and ignore the other.
+    if (isa<SCEVConstant>(Y->getD())) {
+      *X = *Y;
+      return true;
+    }
+    return false;
+  }
+
+  // At this point, the pseudo-code in Figure 4 of the paper
+  // checks if (X->isPoint() && Y->isPoint()).
+  // This case can't occur in our implementation,
+  // since a Point can only arise as the result of intersecting
+  // two Line constraints, and the right-hand value, Y, is never
+  // the result of an intersection.
+  assert(!(X->isPoint() && Y->isPoint()) &&
+         "We shouldn't ever see X->isPoint() && Y->isPoint()");
+
+  if (X->isLine() && Y->isLine()) {
+    DEBUG(dbgs() << "\t    intersect 2 lines\n");
+    const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
+    const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
+    if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
+      // slopes are equal, so lines are parallel
+      DEBUG(dbgs() << "\t\tsame slope\n");
+      Prod1 = SE->getMulExpr(X->getC(), Y->getB());
+      Prod2 = SE->getMulExpr(X->getB(), Y->getC());
+      if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
+        return false;
+      if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
+        X->setEmpty();
+        ++DeltaSuccesses;
+        return true;
+      }
+      return false;
+    }
+    if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
+      // slopes differ, so lines intersect
+      DEBUG(dbgs() << "\t\tdifferent slopes\n");
+      const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
+      const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
+      const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
+      const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
+      const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
+      const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
+      const SCEVConstant *C1A2_C2A1 =
+        dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
+      const SCEVConstant *C1B2_C2B1 =
+        dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
+      const SCEVConstant *A1B2_A2B1 =
+        dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
+      const SCEVConstant *A2B1_A1B2 =
+        dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
+      if (!C1B2_C2B1 || !C1A2_C2A1 ||
+          !A1B2_A2B1 || !A2B1_A1B2)
+        return false;
+      APInt Xtop = C1B2_C2B1->getValue()->getValue();
+      APInt Xbot = A1B2_A2B1->getValue()->getValue();
+      APInt Ytop = C1A2_C2A1->getValue()->getValue();
+      APInt Ybot = A2B1_A1B2->getValue()->getValue();
+      DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
+      DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
+      DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
+      DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
+      APInt Xq = Xtop; // these need to be initialized, even
+      APInt Xr = Xtop; // though they're just going to be overwritten
+      APInt::sdivrem(Xtop, Xbot, Xq, Xr);
+      APInt Yq = Ytop;
+      APInt Yr = Ytop;;
+      APInt::sdivrem(Ytop, Ybot, Yq, Yr);
+      if (Xr != 0 || Yr != 0) {
+        X->setEmpty();
+        ++DeltaSuccesses;
+        return true;
+      }
+      DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
+      if (Xq.slt(0) || Yq.slt(0)) {
+        X->setEmpty();
+        ++DeltaSuccesses;
+        return true;
+      }
+      if (const SCEVConstant *CUB =
+          collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
+        APInt UpperBound = CUB->getValue()->getValue();
+        DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
+        if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
+          X->setEmpty();
+          ++DeltaSuccesses;
+          return true;
+        }
+      }
+      X->setPoint(SE->getConstant(Xq),
+                  SE->getConstant(Yq),
+                  X->getAssociatedLoop());
+      ++DeltaSuccesses;
+      return true;
+    }
+    return false;
+  }
+
+  // if (X->isLine() && Y->isPoint()) This case can't occur.
+  assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
+
+  if (X->isPoint() && Y->isLine()) {
+    DEBUG(dbgs() << "\t    intersect Point and Line\n");
+    const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
+    const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
+    const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
+    if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
+      return false;
+    if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
+      X->setEmpty();
+      ++DeltaSuccesses;
+      return true;
+    }
+    return false;
+  }
+
+  llvm_unreachable("shouldn't reach the end of Constraint intersection");
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DependenceAnalysis methods
+
+// For debugging purposes. Dumps a dependence to OS.
+void Dependence::dump(raw_ostream &OS) const {
+  bool Splitable = false;
+  if (isConfused())
+    OS << "confused";
+  else {
+    if (isConsistent())
+      OS << "consistent ";
+    if (isFlow())
+      OS << "flow";
+    else if (isOutput())
+      OS << "output";
+    else if (isAnti())
+      OS << "anti";
+    else if (isInput())
+      OS << "input";
+    unsigned Levels = getLevels();
+    if (Levels) {
+      OS << " [";
+      for (unsigned II = 1; II <= Levels; ++II) {
+        if (isSplitable(II))
+          Splitable = true;
+        if (isPeelFirst(II))
+          OS << 'p';
+        const SCEV *Distance = getDistance(II);
+        if (Distance)
+          OS << *Distance;
+        else if (isScalar(II))
+          OS << "S";
+        else {
+          unsigned Direction = getDirection(II);
+          if (Direction == DVEntry::ALL)
+            OS << "*";
+          else {
+            if (Direction & DVEntry::LT)
+              OS << "<";
+            if (Direction & DVEntry::EQ)
+              OS << "=";
+            if (Direction & DVEntry::GT)
+              OS << ">";
+          }
+        }
+        if (isPeelLast(II))
+          OS << 'p';
+        if (II < Levels)
+          OS << " ";
+      }
+      if (isLoopIndependent())
+        OS << "|<";
+      OS << "]";
+      if (Splitable)
+        OS << " splitable";
+    }
+  }
+  OS << "!\n";
+}
+
+
+
+static
+AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
+                                                  const Value *A,
+                                                  const Value *B) {
+  const Value *AObj = GetUnderlyingObject(A);
+  const Value *BObj = GetUnderlyingObject(B);
+  return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
+                   BObj, AA->getTypeStoreSize(BObj->getType()));
+}
+
+
+// Returns true if the load or store can be analyzed. Atomic and volatile
+// operations have properties which this analysis does not understand.
+static
+bool isLoadOrStore(const Instruction *I) {
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->isUnordered();
+  else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->isUnordered();
+  return false;
+}
+
+
+static
+const Value *getPointerOperand(const Instruction *I) {
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->getPointerOperand();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->getPointerOperand();
+  llvm_unreachable("Value is not load or store instruction");
+  return 0;
+}
+
+
+// Examines the loop nesting of the Src and Dst
+// instructions and establishes their shared loops. Sets the variables
+// CommonLevels, SrcLevels, and MaxLevels.
+// The source and destination instructions needn't be contained in the same
+// loop. The routine establishNestingLevels finds the level of most deeply
+// nested loop that contains them both, CommonLevels. An instruction that's
+// not contained in a loop is at level = 0. MaxLevels is equal to the level
+// of the source plus the level of the destination, minus CommonLevels.
+// This lets us allocate vectors MaxLevels in length, with room for every
+// distinct loop referenced in both the source and destination subscripts.
+// The variable SrcLevels is the nesting depth of the source instruction.
+// It's used to help calculate distinct loops referenced by the destination.
+// Here's the map from loops to levels:
+//            0 - unused
+//            1 - outermost common loop
+//          ... - other common loops
+// CommonLevels - innermost common loop
+//          ... - loops containing Src but not Dst
+//    SrcLevels - innermost loop containing Src but not Dst
+//          ... - loops containing Dst but not Src
+//    MaxLevels - innermost loops containing Dst but not Src
+// Consider the follow code fragment:
+//   for (a = ...) {
+//     for (b = ...) {
+//       for (c = ...) {
+//         for (d = ...) {
+//           A[] = ...;
+//         }
+//       }
+//       for (e = ...) {
+//         for (f = ...) {
+//           for (g = ...) {
+//             ... = A[];
+//           }
+//         }
+//       }
+//     }
+//   }
+// If we're looking at the possibility of a dependence between the store
+// to A (the Src) and the load from A (the Dst), we'll note that they
+// have 2 loops in common, so CommonLevels will equal 2 and the direction
+// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
+// A map from loop names to loop numbers would look like
+//     a - 1
+//     b - 2 = CommonLevels
+//     c - 3
+//     d - 4 = SrcLevels
+//     e - 5
+//     f - 6
+//     g - 7 = MaxLevels
+void DependenceAnalysis::establishNestingLevels(const Instruction *Src,
+                                                const Instruction *Dst) {
+  const BasicBlock *SrcBlock = Src->getParent();
+  const BasicBlock *DstBlock = Dst->getParent();
+  unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
+  unsigned DstLevel = LI->getLoopDepth(DstBlock);
+  const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
+  const Loop *DstLoop = LI->getLoopFor(DstBlock);
+  SrcLevels = SrcLevel;
+  MaxLevels = SrcLevel + DstLevel;
+  while (SrcLevel > DstLevel) {
+    SrcLoop = SrcLoop->getParentLoop();
+    SrcLevel--;
+  }
+  while (DstLevel > SrcLevel) {
+    DstLoop = DstLoop->getParentLoop();
+    DstLevel--;
+  }
+  while (SrcLoop != DstLoop) {
+    SrcLoop = SrcLoop->getParentLoop();
+    DstLoop = DstLoop->getParentLoop();
+    SrcLevel--;
+  }
+  CommonLevels = SrcLevel;
+  MaxLevels -= CommonLevels;
+}
+
+
+// Given one of the loops containing the source, return
+// its level index in our numbering scheme.
+unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const {
+  return SrcLoop->getLoopDepth();
+}
+
+
+// Given one of the loops containing the destination,
+// return its level index in our numbering scheme.
+unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const {
+  unsigned D = DstLoop->getLoopDepth();
+  if (D > CommonLevels)
+    return D - CommonLevels + SrcLevels;
+  else
+    return D;
+}
+
+
+// Returns true if Expression is loop invariant in LoopNest.
+bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression,
+                                         const Loop *LoopNest) const {
+  if (!LoopNest)
+    return true;
+  return SE->isLoopInvariant(Expression, LoopNest) &&
+    isLoopInvariant(Expression, LoopNest->getParentLoop());
+}
+
+
+
+// Finds the set of loops from the LoopNest that
+// have a level <= CommonLevels and are referred to by the SCEV Expression.
+void DependenceAnalysis::collectCommonLoops(const SCEV *Expression,
+                                            const Loop *LoopNest,
+                                            SmallBitVector &Loops) const {
+  while (LoopNest) {
+    unsigned Level = LoopNest->getLoopDepth();
+    if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
+      Loops.set(Level);
+    LoopNest = LoopNest->getParentLoop();
+  }
+}
+
+
+// removeMatchingExtensions - Examines a subscript pair.
+// If the source and destination are identically sign (or zero)
+// extended, it strips off the extension in an effect to simplify
+// the actual analysis.
+void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) {
+  const SCEV *Src = Pair->Src;
+  const SCEV *Dst = Pair->Dst;
+  if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||
+      (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
+    const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
+    const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
+    if (SrcCast->getType() == DstCast->getType()) {
+      Pair->Src = SrcCast->getOperand();
+      Pair->Dst = DstCast->getOperand();
+    }
+  }
+}
+
+
+// Examine the scev and return true iff it's linear.
+// Collect any loops mentioned in the set of "Loops".
+bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src,
+                                           const Loop *LoopNest,
+                                           SmallBitVector &Loops) {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src);
+  if (!AddRec)
+    return isLoopInvariant(Src, LoopNest);
+  const SCEV *Start = AddRec->getStart();
+  const SCEV *Step = AddRec->getStepRecurrence(*SE);
+  if (!isLoopInvariant(Step, LoopNest))
+    return false;
+  Loops.set(mapSrcLoop(AddRec->getLoop()));
+  return checkSrcSubscript(Start, LoopNest, Loops);
+}
+
+
+
+// Examine the scev and return true iff it's linear.
+// Collect any loops mentioned in the set of "Loops".
+bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst,
+                                           const Loop *LoopNest,
+                                           SmallBitVector &Loops) {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+  if (!AddRec)
+    return isLoopInvariant(Dst, LoopNest);
+  const SCEV *Start = AddRec->getStart();
+  const SCEV *Step = AddRec->getStepRecurrence(*SE);
+  if (!isLoopInvariant(Step, LoopNest))
+    return false;
+  Loops.set(mapDstLoop(AddRec->getLoop()));
+  return checkDstSubscript(Start, LoopNest, Loops);
+}
+
+
+// Examines the subscript pair (the Src and Dst SCEVs)
+// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
+// Collects the associated loops in a set.
+DependenceAnalysis::Subscript::ClassificationKind
+DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
+                                 const SCEV *Dst, const Loop *DstLoopNest,
+                                 SmallBitVector &Loops) {
+  SmallBitVector SrcLoops(MaxLevels + 1);
+  SmallBitVector DstLoops(MaxLevels + 1);
+  if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
+    return Subscript::NonLinear;
+  if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
+    return Subscript::NonLinear;
+  Loops = SrcLoops;
+  Loops |= DstLoops;
+  unsigned N = Loops.count();
+  if (N == 0)
+    return Subscript::ZIV;
+  if (N == 1)
+    return Subscript::SIV;
+  if (N == 2 && (SrcLoops.count() == 0 ||
+                 DstLoops.count() == 0 ||
+                 (SrcLoops.count() == 1 && DstLoops.count() == 1)))
+    return Subscript::RDIV;
+  return Subscript::MIV;
+}
+
+
+// A wrapper around SCEV::isKnownPredicate.
+// Looks for cases where we're interested in comparing for equality.
+// If both X and Y have been identically sign or zero extended,
+// it strips off the (confusing) extensions before invoking
+// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
+// will be similarly updated.
+//
+// If SCEV::isKnownPredicate can't prove the predicate,
+// we try simple subtraction, which seems to help in some cases
+// involving symbolics.
+bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred,
+                                          const SCEV *X,
+                                          const SCEV *Y) const {
+  if (Pred == CmpInst::ICMP_EQ ||
+      Pred == CmpInst::ICMP_NE) {
+    if ((isa<SCEVSignExtendExpr>(X) &&
+         isa<SCEVSignExtendExpr>(Y)) ||
+        (isa<SCEVZeroExtendExpr>(X) &&
+         isa<SCEVZeroExtendExpr>(Y))) {
+      const SCEVCastExpr *CX = cast<SCEVCastExpr>(X);
+      const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y);
+      const SCEV *Xop = CX->getOperand();
+      const SCEV *Yop = CY->getOperand();
+      if (Xop->getType() == Yop->getType()) {
+        X = Xop;
+        Y = Yop;
+      }
+    }
+  }
+  if (SE->isKnownPredicate(Pred, X, Y))
+    return true;
+  // If SE->isKnownPredicate can't prove the condition,
+  // we try the brute-force approach of subtracting
+  // and testing the difference.
+  // By testing with SE->isKnownPredicate first, we avoid
+  // the possibility of overflow when the arguments are constants.
+  const SCEV *Delta = SE->getMinusSCEV(X, Y);
+  switch (Pred) {
+  case CmpInst::ICMP_EQ:
+    return Delta->isZero();
+  case CmpInst::ICMP_NE:
+    return SE->isKnownNonZero(Delta);
+  case CmpInst::ICMP_SGE:
+    return SE->isKnownNonNegative(Delta);
+  case CmpInst::ICMP_SLE:
+    return SE->isKnownNonPositive(Delta);
+  case CmpInst::ICMP_SGT:
+    return SE->isKnownPositive(Delta);
+  case CmpInst::ICMP_SLT:
+    return SE->isKnownNegative(Delta);
+  default:
+    llvm_unreachable("unexpected predicate in isKnownPredicate");
+  }
+}
+
+
+// All subscripts are all the same type.
+// Loop bound may be smaller (e.g., a char).
+// Should zero extend loop bound, since it's always >= 0.
+// This routine collects upper bound and extends if needed.
+// Return null if no bound available.
+const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L,
+                                                  Type *T) const {
+  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+    const SCEV *UB = SE->getBackedgeTakenCount(L);
+    return SE->getNoopOrZeroExtend(UB, T);
+  }
+  return NULL;
+}
+
+
+// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
+// If the cast fails, returns NULL.
+const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L,
+                                                                  Type *T
+                                                                  ) const {
+  if (const SCEV *UB = collectUpperBound(L, T))
+    return dyn_cast<SCEVConstant>(UB);
+  return NULL;
+}
+
+
+// testZIV -
+// When we have a pair of subscripts of the form [c1] and [c2],
+// where c1 and c2 are both loop invariant, we attack it using
+// the ZIV test. Basically, we test by comparing the two values,
+// but there are actually three possible results:
+// 1) the values are equal, so there's a dependence
+// 2) the values are different, so there's no dependence
+// 3) the values might be equal, so we have to assume a dependence.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testZIV(const SCEV *Src,
+                                 const SCEV *Dst,
+                                 FullDependence &Result) const {
+  DEBUG(dbgs() << "    src = " << *Src << "\n");
+  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
+  ++ZIVapplications;
+  if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
+    DEBUG(dbgs() << "    provably dependent\n");
+    return false; // provably dependent
+  }
+  if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
+    DEBUG(dbgs() << "    provably independent\n");
+    ++ZIVindependence;
+    return true; // provably independent
+  }
+  DEBUG(dbgs() << "    possibly dependent\n");
+  Result.Consistent = false;
+  return false; // possibly dependent
+}
+
+
+// strongSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.1
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+//  and a is a constant, we can solve it exactly using the Strong SIV test.
+//
+// Can prove independence. Failing that, can compute distance (and direction).
+// In the presence of symbolic terms, we can sometimes make progress.
+//
+// If there's a dependence,
+//
+//    c1 + a*i = c2 + a*i'
+//
+// The dependence distance is
+//
+//    d = i' - i = (c1 - c2)/a
+//
+// A dependence only exists if d is an integer and abs(d) <= U, where U is the
+// loop's upper bound. If a dependence exists, the dependence direction is
+// defined as
+//
+//                { < if d > 0
+//    direction = { = if d = 0
+//                { > if d < 0
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff,
+                                       const SCEV *SrcConst,
+                                       const SCEV *DstConst,
+                                       const Loop *CurLoop,
+                                       unsigned Level,
+                                       FullDependence &Result,
+                                       Constraint &NewConstraint) const {
+  DEBUG(dbgs() << "\tStrong SIV test\n");
+  DEBUG(dbgs() << "\t    Coeff = " << *Coeff);
+  DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst);
+  DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst);
+  DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
+  ++StrongSIVapplications;
+  assert(0 < Level && Level <= CommonLevels && "level out of range");
+  Level--;
+
+  const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta);
+  DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
+
+  // check that |Delta| < iteration count
+  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound);
+    DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
+    const SCEV *AbsDelta =
+      SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
+    const SCEV *AbsCoeff =
+      SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
+    const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
+    if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
+      // Distance greater than trip count - no dependence
+      ++StrongSIVindependence;
+      ++StrongSIVsuccesses;
+      return true;
+    }
+  }
+
+  // Can we compute distance?
+  if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
+    APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue();
+    APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue();
+    APInt Distance  = ConstDelta; // these need to be initialized
+    APInt Remainder = ConstDelta;
+    APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
+    DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
+    DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
+    // Make sure Coeff divides Delta exactly
+    if (Remainder != 0) {
+      // Coeff doesn't divide Distance, no dependence
+      ++StrongSIVindependence;
+      ++StrongSIVsuccesses;
+      return true;
+    }
+    Result.DV[Level].Distance = SE->getConstant(Distance);
+    NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
+    if (Distance.sgt(0))
+      Result.DV[Level].Direction &= Dependence::DVEntry::LT;
+    else if (Distance.slt(0))
+      Result.DV[Level].Direction &= Dependence::DVEntry::GT;
+    else
+      Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
+    ++StrongSIVsuccesses;
+  }
+  else if (Delta->isZero()) {
+    // since 0/X == 0
+    Result.DV[Level].Distance = Delta;
+    NewConstraint.setDistance(Delta, CurLoop);
+    Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
+    ++StrongSIVsuccesses;
+  }
+  else {
+    if (Coeff->isOne()) {
+      DEBUG(dbgs() << "\t    Distance = " << *Delta << "\n");
+      Result.DV[Level].Distance = Delta; // since X/1 == X
+      NewConstraint.setDistance(Delta, CurLoop);
+    }
+    else {
+      Result.Consistent = false;
+      NewConstraint.setLine(Coeff,
+                            SE->getNegativeSCEV(Coeff),
+                            SE->getNegativeSCEV(Delta), CurLoop);
+    }
+
+    // maybe we can get a useful direction
+    bool DeltaMaybeZero     = !SE->isKnownNonZero(Delta);
+    bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
+    bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
+    bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
+    bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
+    // The double negatives above are confusing.
+    // It helps to read !SE->isKnownNonZero(Delta)
+    // as "Delta might be Zero"
+    unsigned NewDirection = Dependence::DVEntry::NONE;
+    if ((DeltaMaybePositive && CoeffMaybePositive) ||
+        (DeltaMaybeNegative && CoeffMaybeNegative))
+      NewDirection = Dependence::DVEntry::LT;
+    if (DeltaMaybeZero)
+      NewDirection |= Dependence::DVEntry::EQ;
+    if ((DeltaMaybeNegative && CoeffMaybePositive) ||
+        (DeltaMaybePositive && CoeffMaybeNegative))
+      NewDirection |= Dependence::DVEntry::GT;
+    if (NewDirection < Result.DV[Level].Direction)
+      ++StrongSIVsuccesses;
+    Result.DV[Level].Direction &= NewDirection;
+  }
+  return false;
+}
+
+
+// weakCrossingSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Crossing SIV test.
+//
+// Given c1 + a*i = c2 - a*i', we can look for the intersection of
+// the two lines, where i = i', yielding
+//
+//    c1 + a*i = c2 - a*i
+//    2a*i = c2 - c1
+//    i = (c2 - c1)/2a
+//
+// If i < 0, there is no dependence.
+// If i > upperbound, there is no dependence.
+// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
+// If i = upperbound, there's a dependence with distance = 0.
+// If i is integral, there's a dependence (all directions).
+// If the non-integer part = 1/2, there's a dependence (<> directions).
+// Otherwise, there's no dependence.
+//
+// Can prove independence. Failing that,
+// can sometimes refine the directions.
+// Can determine iteration for splitting.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
+                                             const SCEV *SrcConst,
+                                             const SCEV *DstConst,
+                                             const Loop *CurLoop,
+                                             unsigned Level,
+                                             FullDependence &Result,
+                                             Constraint &NewConstraint,
+                                             const SCEV *&SplitIter) const {
+  DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
+  DEBUG(dbgs() << "\t    Coeff = " << *Coeff << "\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
+  ++WeakCrossingSIVapplications;
+  assert(0 < Level && Level <= CommonLevels && "Level out of range");
+  Level--;
+  Result.Consistent = false;
+  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
+  if (Delta->isZero()) {
+    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
+    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
+    ++WeakCrossingSIVsuccesses;
+    if (!Result.DV[Level].Direction) {
+      ++WeakCrossingSIVindependence;
+      return true;
+    }
+    Result.DV[Level].Distance = Delta; // = 0
+    return false;
+  }
+  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
+  if (!ConstCoeff)
+    return false;
+
+  Result.DV[Level].Splitable = true;
+  if (SE->isKnownNegative(ConstCoeff)) {
+    ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
+    assert(ConstCoeff &&
+           "dynamic cast of negative of ConstCoeff should yield constant");
+    Delta = SE->getNegativeSCEV(Delta);
+  }
+  assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
+
+  // compute SplitIter for use by DependenceAnalysis::getSplitIteration()
+  SplitIter =
+    SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0),
+                                    Delta),
+                    SE->getMulExpr(SE->getConstant(Delta->getType(), 2),
+                                   ConstCoeff));
+  DEBUG(dbgs() << "\t    Split iter = " << *SplitIter << "\n");
+
+  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+  if (!ConstDelta)
+    return false;
+
+  // We're certain that ConstCoeff > 0; therefore,
+  // if Delta < 0, then no dependence.
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  DEBUG(dbgs() << "\t    ConstCoeff = " << *ConstCoeff << "\n");
+  if (SE->isKnownNegative(Delta)) {
+    // No dependence, Delta < 0
+    ++WeakCrossingSIVindependence;
+    ++WeakCrossingSIVsuccesses;
+    return true;
+  }
+
+  // We're certain that Delta > 0 and ConstCoeff > 0.
+  // Check Delta/(2*ConstCoeff) against upper loop bound
+  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
+    const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
+    const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
+                                    ConstantTwo);
+    DEBUG(dbgs() << "\t    ML = " << *ML << "\n");
+    if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
+      // Delta too big, no dependence
+      ++WeakCrossingSIVindependence;
+      ++WeakCrossingSIVsuccesses;
+      return true;
+    }
+    if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
+      // i = i' = UB
+      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
+      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
+      ++WeakCrossingSIVsuccesses;
+      if (!Result.DV[Level].Direction) {
+        ++WeakCrossingSIVindependence;
+        return true;
+      }
+      Result.DV[Level].Splitable = false;
+      Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0);
+      return false;
+    }
+  }
+
+  // check that Coeff divides Delta
+  APInt APDelta = ConstDelta->getValue()->getValue();
+  APInt APCoeff = ConstCoeff->getValue()->getValue();
+  APInt Distance = APDelta; // these need to be initialzed
+  APInt Remainder = APDelta;
+  APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
+  DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
+  if (Remainder != 0) {
+    // Coeff doesn't divide Delta, no dependence
+    ++WeakCrossingSIVindependence;
+    ++WeakCrossingSIVsuccesses;
+    return true;
+  }
+  DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
+
+  // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
+  APInt Two = APInt(Distance.getBitWidth(), 2, true);
+  Remainder = Distance.srem(Two);
+  DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
+  if (Remainder != 0) {
+    // Equal direction isn't possible
+    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
+    ++WeakCrossingSIVsuccesses;
+  }
+  return false;
+}
+
+
+// Kirch's algorithm, from
+//
+//        Optimizing Supercompilers for Supercomputers
+//        Michael Wolfe
+//        MIT Press, 1989
+//
+// Program 2.1, page 29.
+// Computes the GCD of AM and BM.
+// Also finds a solution to the equation ax - by = gdc(a, b).
+// Returns true iff the gcd divides Delta.
+static
+bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta,
+             APInt &G, APInt &X, APInt &Y) {
+  APInt A0(Bits, 1, true), A1(Bits, 0, true);
+  APInt B0(Bits, 0, true), B1(Bits, 1, true);
+  APInt G0 = AM.abs();
+  APInt G1 = BM.abs();
+  APInt Q = G0; // these need to be initialized
+  APInt R = G0;
+  APInt::sdivrem(G0, G1, Q, R);
+  while (R != 0) {
+    APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
+    APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
+    G0 = G1; G1 = R;
+    APInt::sdivrem(G0, G1, Q, R);
+  }
+  G = G1;
+  DEBUG(dbgs() << "\t    GCD = " << G << "\n");
+  X = AM.slt(0) ? -A1 : A1;
+  Y = BM.slt(0) ? B1 : -B1;
+
+  // make sure gcd divides Delta
+  R = Delta.srem(G);
+  if (R != 0)
+    return true; // gcd doesn't divide Delta, no dependence
+  Q = Delta.sdiv(G);
+  X *= Q;
+  Y *= Q;
+  return false;
+}
+
+
+static
+APInt floorOfQuotient(APInt A, APInt B) {
+  APInt Q = A; // these need to be initialized
+  APInt R = A;
+  APInt::sdivrem(A, B, Q, R);
+  if (R == 0)
+    return Q;
+  if ((A.sgt(0) && B.sgt(0)) ||
+      (A.slt(0) && B.slt(0)))
+    return Q;
+  else
+    return Q - 1;
+}
+
+
+static
+APInt ceilingOfQuotient(APInt A, APInt B) {
+  APInt Q = A; // these need to be initialized
+  APInt R = A;
+  APInt::sdivrem(A, B, Q, R);
+  if (R == 0)
+    return Q;
+  if ((A.sgt(0) && B.sgt(0)) ||
+      (A.slt(0) && B.slt(0)))
+    return Q + 1;
+  else
+    return Q;
+}
+
+
+static
+APInt maxAPInt(APInt A, APInt B) {
+  return A.sgt(B) ? A : B;
+}
+
+
+static
+APInt minAPInt(APInt A, APInt B) {
+  return A.slt(B) ? A : B;
+}
+
+
+// exactSIVtest -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
+// where i is an induction variable, c1 and c2 are loop invariant, and a1
+// and a2 are constant, we can solve it exactly using an algorithm developed
+// by Banerjee and Wolfe. See Section 2.5.3 in
+//
+//        Optimizing Supercompilers for Supercomputers
+//        Michael Wolfe
+//        MIT Press, 1989
+//
+// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
+// so use them if possible. They're also a bit better with symbolics and,
+// in the case of the strong SIV test, can compute Distances.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
+                                      const SCEV *DstCoeff,
+                                      const SCEV *SrcConst,
+                                      const SCEV *DstConst,
+                                      const Loop *CurLoop,
+                                      unsigned Level,
+                                      FullDependence &Result,
+                                      Constraint &NewConstraint) const {
+  DEBUG(dbgs() << "\tExact SIV test\n");
+  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
+  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
+  ++ExactSIVapplications;
+  assert(0 < Level && Level <= CommonLevels && "Level out of range");
+  Level--;
+  Result.Consistent = false;
+  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff),
+                        Delta, CurLoop);
+  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+  const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+  const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+  if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
+    return false;
+
+  // find gcd
+  APInt G, X, Y;
+  APInt AM = ConstSrcCoeff->getValue()->getValue();
+  APInt BM = ConstDstCoeff->getValue()->getValue();
+  unsigned Bits = AM.getBitWidth();
+  if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+    // gcd doesn't divide Delta, no dependence
+    ++ExactSIVindependence;
+    ++ExactSIVsuccesses;
+    return true;
+  }
+
+  DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
+
+  // since SCEV construction normalizes, LM = 0
+  APInt UM(Bits, 1, true);
+  bool UMvalid = false;
+  // UM is perhaps unavailable, let's check
+  if (const SCEVConstant *CUB =
+      collectConstantUpperBound(CurLoop, Delta->getType())) {
+    UM = CUB->getValue()->getValue();
+    DEBUG(dbgs() << "\t    UM = " << UM << "\n");
+    UMvalid = true;
+  }
+
+  APInt TU(APInt::getSignedMaxValue(Bits));
+  APInt TL(APInt::getSignedMinValue(Bits));
+
+  // test(BM/G, LM-X) and test(-BM/G, X-UM)
+  APInt TMUL = BM.sdiv(G);
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
+    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    if (UMvalid) {
+      TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL));
+      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    }
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
+    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    if (UMvalid) {
+      TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL));
+      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    }
+  }
+
+  // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
+  TMUL = AM.sdiv(G);
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
+    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    if (UMvalid) {
+      TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL));
+      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    }
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
+    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    if (UMvalid) {
+      TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL));
+      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    }
+  }
+  if (TL.sgt(TU)) {
+    ++ExactSIVindependence;
+    ++ExactSIVsuccesses;
+    return true;
+  }
+
+  // explore directions
+  unsigned NewDirection = Dependence::DVEntry::NONE;
+
+  // less than
+  APInt SaveTU(TU); // save these
+  APInt SaveTL(TL);
+  DEBUG(dbgs() << "\t    exploring LT direction\n");
+  TMUL = AM - BM;
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL));
+    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL));
+    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
+  }
+  if (TL.sle(TU)) {
+    NewDirection |= Dependence::DVEntry::LT;
+    ++ExactSIVsuccesses;
+  }
+
+  // equal
+  TU = SaveTU; // restore
+  TL = SaveTL;
+  DEBUG(dbgs() << "\t    exploring EQ direction\n");
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL));
+    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL));
+    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
+  }
+  TMUL = BM - AM;
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL));
+    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL));
+    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
+  }
+  if (TL.sle(TU)) {
+    NewDirection |= Dependence::DVEntry::EQ;
+    ++ExactSIVsuccesses;
+  }
+
+  // greater than
+  TU = SaveTU; // restore
+  TL = SaveTL;
+  DEBUG(dbgs() << "\t    exploring GT direction\n");
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL));
+    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL));
+    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
+  }
+  if (TL.sle(TU)) {
+    NewDirection |= Dependence::DVEntry::GT;
+    ++ExactSIVsuccesses;
+  }
+
+  // finished
+  Result.DV[Level].Direction &= NewDirection;
+  if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
+    ++ExactSIVindependence;
+  return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
+}
+
+
+
+// Return true if the divisor evenly divides the dividend.
+static
+bool isRemainderZero(const SCEVConstant *Dividend,
+                     const SCEVConstant *Divisor) {
+  APInt ConstDividend = Dividend->getValue()->getValue();
+  APInt ConstDivisor = Divisor->getValue()->getValue();
+  return ConstDividend.srem(ConstDivisor) == 0;
+}
+
+
+// weakZeroSrcSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Zero SIV test.
+//
+// Given
+//
+//    c1 = c2 + a*i
+//
+// we get
+//
+//    (c1 - c2)/a = i
+//
+// If i is not an integer, there's no dependence.
+// If i < 0 or > UB, there's no dependence.
+// If i = 0, the direction is <= and peeling the
+// 1st iteration will break the dependence.
+// If i = UB, the direction is >= and peeling the
+// last iteration will break the dependence.
+// Otherwise, the direction is *.
+//
+// Can prove independence. Failing that, we can sometimes refine
+// the directions. Can sometimes show that first or last
+// iteration carries all the dependences (so worth peeling).
+//
+// (see also weakZeroDstSIVtest)
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff,
+                                            const SCEV *SrcConst,
+                                            const SCEV *DstConst,
+                                            const Loop *CurLoop,
+                                            unsigned Level,
+                                            FullDependence &Result,
+                                            Constraint &NewConstraint) const {
+  // For the WeakSIV test, it's possible the loop isn't common to
+  // the Src and Dst loops. If it isn't, then there's no need to
+  // record a direction.
+  DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
+  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << "\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
+  ++WeakZeroSIVapplications;
+  assert(0 < Level && Level <= MaxLevels && "Level out of range");
+  Level--;
+  Result.Consistent = false;
+  const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
+  NewConstraint.setLine(SE->getConstant(Delta->getType(), 0),
+                        DstCoeff, Delta, CurLoop);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
+    if (Level < CommonLevels) {
+      Result.DV[Level].Direction &= Dependence::DVEntry::LE;
+      Result.DV[Level].PeelFirst = true;
+      ++WeakZeroSIVsuccesses;
+    }
+    return false; // dependences caused by first iteration
+  }
+  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+  if (!ConstCoeff)
+    return false;
+  const SCEV *AbsCoeff =
+    SE->isKnownNegative(ConstCoeff) ?
+    SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
+  const SCEV *NewDelta =
+    SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
+
+  // check that Delta/SrcCoeff < iteration count
+  // really check NewDelta < count*AbsCoeff
+  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
+    const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
+    if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
+      ++WeakZeroSIVindependence;
+      ++WeakZeroSIVsuccesses;
+      return true;
+    }
+    if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
+      // dependences caused by last iteration
+      if (Level < CommonLevels) {
+        Result.DV[Level].Direction &= Dependence::DVEntry::GE;
+        Result.DV[Level].PeelLast = true;
+        ++WeakZeroSIVsuccesses;
+      }
+      return false;
+    }
+  }
+
+  // check that Delta/SrcCoeff >= 0
+  // really check that NewDelta >= 0
+  if (SE->isKnownNegative(NewDelta)) {
+    // No dependence, newDelta < 0
+    ++WeakZeroSIVindependence;
+    ++WeakZeroSIVsuccesses;
+    return true;
+  }
+
+  // if SrcCoeff doesn't divide Delta, then no dependence
+  if (isa<SCEVConstant>(Delta) &&
+      !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
+    ++WeakZeroSIVindependence;
+    ++WeakZeroSIVsuccesses;
+    return true;
+  }
+  return false;
+}
+
+
+// weakZeroDstSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Zero SIV test.
+//
+// Given
+//
+//    c1 + a*i = c2
+//
+// we get
+//
+//    i = (c2 - c1)/a
+//
+// If i is not an integer, there's no dependence.
+// If i < 0 or > UB, there's no dependence.
+// If i = 0, the direction is <= and peeling the
+// 1st iteration will break the dependence.
+// If i = UB, the direction is >= and peeling the
+// last iteration will break the dependence.
+// Otherwise, the direction is *.
+//
+// Can prove independence. Failing that, we can sometimes refine
+// the directions. Can sometimes show that first or last
+// iteration carries all the dependences (so worth peeling).
+//
+// (see also weakZeroSrcSIVtest)
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff,
+                                            const SCEV *SrcConst,
+                                            const SCEV *DstConst,
+                                            const Loop *CurLoop,
+                                            unsigned Level,
+                                            FullDependence &Result,
+                                            Constraint &NewConstraint) const {
+  // For the WeakSIV test, it's possible the loop isn't common to the
+  // Src and Dst loops. If it isn't, then there's no need to record a direction.
+  DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
+  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << "\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
+  ++WeakZeroSIVapplications;
+  assert(0 < Level && Level <= SrcLevels && "Level out of range");
+  Level--;
+  Result.Consistent = false;
+  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+  NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0),
+                        Delta, CurLoop);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
+    if (Level < CommonLevels) {
+      Result.DV[Level].Direction &= Dependence::DVEntry::LE;
+      Result.DV[Level].PeelFirst = true;
+      ++WeakZeroSIVsuccesses;
+    }
+    return false; // dependences caused by first iteration
+  }
+  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+  if (!ConstCoeff)
+    return false;
+  const SCEV *AbsCoeff =
+    SE->isKnownNegative(ConstCoeff) ?
+    SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
+  const SCEV *NewDelta =
+    SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
+
+  // check that Delta/SrcCoeff < iteration count
+  // really check NewDelta < count*AbsCoeff
+  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
+    const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
+    if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
+      ++WeakZeroSIVindependence;
+      ++WeakZeroSIVsuccesses;
+      return true;
+    }
+    if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
+      // dependences caused by last iteration
+      if (Level < CommonLevels) {
+        Result.DV[Level].Direction &= Dependence::DVEntry::GE;
+        Result.DV[Level].PeelLast = true;
+        ++WeakZeroSIVsuccesses;
+      }
+      return false;
+    }
+  }
+
+  // check that Delta/SrcCoeff >= 0
+  // really check that NewDelta >= 0
+  if (SE->isKnownNegative(NewDelta)) {
+    // No dependence, newDelta < 0
+    ++WeakZeroSIVindependence;
+    ++WeakZeroSIVsuccesses;
+    return true;
+  }
+
+  // if SrcCoeff doesn't divide Delta, then no dependence
+  if (isa<SCEVConstant>(Delta) &&
+      !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
+    ++WeakZeroSIVindependence;
+    ++WeakZeroSIVsuccesses;
+    return true;
+  }
+  return false;
+}
+
+
+// exactRDIVtest - Tests the RDIV subscript pair for dependence.
+// Things of the form [c1 + a*i] and [c2 + b*j],
+// where i and j are induction variable, c1 and c2 are loop invariant,
+// and a and b are constants.
+// Returns true if any possible dependence is disproved.
+// Marks the result as inconsistant.
+// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
+bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
+                                       const SCEV *DstCoeff,
+                                       const SCEV *SrcConst,
+                                       const SCEV *DstConst,
+                                       const Loop *SrcLoop,
+                                       const Loop *DstLoop,
+                                       FullDependence &Result) const {
+  DEBUG(dbgs() << "\tExact RDIV test\n");
+  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
+  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
+  ++ExactRDIVapplications;
+  Result.Consistent = false;
+  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+  const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+  const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+  if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
+    return false;
+
+  // find gcd
+  APInt G, X, Y;
+  APInt AM = ConstSrcCoeff->getValue()->getValue();
+  APInt BM = ConstDstCoeff->getValue()->getValue();
+  unsigned Bits = AM.getBitWidth();
+  if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+    // gcd doesn't divide Delta, no dependence
+    ++ExactRDIVindependence;
+    return true;
+  }
+
+  DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
+
+  // since SCEV construction seems to normalize, LM = 0
+  APInt SrcUM(Bits, 1, true);
+  bool SrcUMvalid = false;
+  // SrcUM is perhaps unavailable, let's check
+  if (const SCEVConstant *UpperBound =
+      collectConstantUpperBound(SrcLoop, Delta->getType())) {
+    SrcUM = UpperBound->getValue()->getValue();
+    DEBUG(dbgs() << "\t    SrcUM = " << SrcUM << "\n");
+    SrcUMvalid = true;
+  }
+
+  APInt DstUM(Bits, 1, true);
+  bool DstUMvalid = false;
+  // UM is perhaps unavailable, let's check
+  if (const SCEVConstant *UpperBound =
+      collectConstantUpperBound(DstLoop, Delta->getType())) {
+    DstUM = UpperBound->getValue()->getValue();
+    DEBUG(dbgs() << "\t    DstUM = " << DstUM << "\n");
+    DstUMvalid = true;
+  }
+
+  APInt TU(APInt::getSignedMaxValue(Bits));
+  APInt TL(APInt::getSignedMinValue(Bits));
+
+  // test(BM/G, LM-X) and test(-BM/G, X-UM)
+  APInt TMUL = BM.sdiv(G);
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
+    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    if (SrcUMvalid) {
+      TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL));
+      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    }
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
+    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    if (SrcUMvalid) {
+      TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL));
+      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    }
+  }
+
+  // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
+  TMUL = AM.sdiv(G);
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
+    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    if (DstUMvalid) {
+      TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL));
+      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    }
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
+    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    if (DstUMvalid) {
+      TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL));
+      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    }
+  }
+  if (TL.sgt(TU))
+    ++ExactRDIVindependence;
+  return TL.sgt(TU);
+}
+
+
+// symbolicRDIVtest -
+// In Section 4.5 of the Practical Dependence Testing paper,the authors
+// introduce a special case of Banerjee's Inequalities (also called the
+// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
+// particularly cases with symbolics. Since it's only able to disprove
+// dependence (not compute distances or directions), we'll use it as a
+// fall back for the other tests.
+//
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
+// where i and j are induction variables and c1 and c2 are loop invariants,
+// we can use the symbolic tests to disprove some dependences, serving as a
+// backup for the RDIV test. Note that i and j can be the same variable,
+// letting this test serve as a backup for the various SIV tests.
+//
+// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
+//  0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
+// loop bounds for the i and j loops, respectively. So, ...
+//
+// c1 + a1*i = c2 + a2*j
+// a1*i - a2*j = c2 - c1
+//
+// To test for a dependence, we compute c2 - c1 and make sure it's in the
+// range of the maximum and minimum possible values of a1*i - a2*j.
+// Considering the signs of a1 and a2, we have 4 possible cases:
+//
+// 1) If a1 >= 0 and a2 >= 0, then
+//        a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
+//              -a2*N2 <= c2 - c1 <= a1*N1
+//
+// 2) If a1 >= 0 and a2 <= 0, then
+//        a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
+//                  0 <= c2 - c1 <= a1*N1 - a2*N2
+//
+// 3) If a1 <= 0 and a2 >= 0, then
+//        a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
+//        a1*N1 - a2*N2 <= c2 - c1 <= 0
+//
+// 4) If a1 <= 0 and a2 <= 0, then
+//        a1*N1 - a2*0  <= c2 - c1 <= a1*0 - a2*N2
+//        a1*N1         <= c2 - c1 <=       -a2*N2
+//
+// return true if dependence disproved
+bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1,
+                                          const SCEV *A2,
+                                          const SCEV *C1,
+                                          const SCEV *C2,
+                                          const Loop *Loop1,
+                                          const Loop *Loop2) const {
+  ++SymbolicRDIVapplications;
+  DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
+  DEBUG(dbgs() << "\t    A1 = " << *A1);
+  DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
+  DEBUG(dbgs() << "\t    A2 = " << *A2 << "\n");
+  DEBUG(dbgs() << "\t    C1 = " << *C1 << "\n");
+  DEBUG(dbgs() << "\t    C2 = " << *C2 << "\n");
+  const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
+  const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
+  DEBUG(if (N1) dbgs() << "\t    N1 = " << *N1 << "\n");
+  DEBUG(if (N2) dbgs() << "\t    N2 = " << *N2 << "\n");
+  const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
+  const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
+  DEBUG(dbgs() << "\t    C2 - C1 = " << *C2_C1 << "\n");
+  DEBUG(dbgs() << "\t    C1 - C2 = " << *C1_C2 << "\n");
+  if (SE->isKnownNonNegative(A1)) {
+    if (SE->isKnownNonNegative(A2)) {
+      // A1 >= 0 && A2 >= 0
+      if (N1) {
+        // make sure that c2 - c1 <= a1*N1
+        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+        DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+      if (N2) {
+        // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
+        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+        DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+    }
+    else if (SE->isKnownNonPositive(A2)) {
+      // a1 >= 0 && a2 <= 0
+      if (N1 && N2) {
+        // make sure that c2 - c1 <= a1*N1 - a2*N2
+        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+        const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
+        DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+      // make sure that 0 <= c2 - c1
+      if (SE->isKnownNegative(C2_C1)) {
+        ++SymbolicRDIVindependence;
+        return true;
+      }
+    }
+  }
+  else if (SE->isKnownNonPositive(A1)) {
+    if (SE->isKnownNonNegative(A2)) {
+      // a1 <= 0 && a2 >= 0
+      if (N1 && N2) {
+        // make sure that a1*N1 - a2*N2 <= c2 - c1
+        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+        const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
+        DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+      // make sure that c2 - c1 <= 0
+      if (SE->isKnownPositive(C2_C1)) {
+        ++SymbolicRDIVindependence;
+        return true;
+      }
+    }
+    else if (SE->isKnownNonPositive(A2)) {
+      // a1 <= 0 && a2 <= 0
+      if (N1) {
+        // make sure that a1*N1 <= c2 - c1
+        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+        DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+      if (N2) {
+        // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
+        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+        DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+
+// testSIV -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
+// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
+// a2 are constant, we attack it with an SIV test. While they can all be
+// solved with the Exact SIV test, it's worthwhile to use simpler tests when
+// they apply; they're cheaper and sometimes more precise.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testSIV(const SCEV *Src,
+                                 const SCEV *Dst,
+                                 unsigned &Level,
+                                 FullDependence &Result,
+                                 Constraint &NewConstraint,
+                                 const SCEV *&SplitIter) const {
+  DEBUG(dbgs() << "    src = " << *Src << "\n");
+  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
+  const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
+  const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+  if (SrcAddRec && DstAddRec) {
+    const SCEV *SrcConst = SrcAddRec->getStart();
+    const SCEV *DstConst = DstAddRec->getStart();
+    const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+    const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
+    const Loop *CurLoop = SrcAddRec->getLoop();
+    assert(CurLoop == DstAddRec->getLoop() &&
+           "both loops in SIV should be same");
+    Level = mapSrcLoop(CurLoop);
+    bool disproven;
+    if (SrcCoeff == DstCoeff)
+      disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+                                Level, Result, NewConstraint);
+    else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
+      disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+                                      Level, Result, NewConstraint, SplitIter);
+    else
+      disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
+                               Level, Result, NewConstraint);
+    return disproven ||
+      gcdMIVtest(Src, Dst, Result) ||
+      symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
+  }
+  if (SrcAddRec) {
+    const SCEV *SrcConst = SrcAddRec->getStart();
+    const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+    const SCEV *DstConst = Dst;
+    const Loop *CurLoop = SrcAddRec->getLoop();
+    Level = mapSrcLoop(CurLoop);
+    return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+                              Level, Result, NewConstraint) ||
+      gcdMIVtest(Src, Dst, Result);
+  }
+  if (DstAddRec) {
+    const SCEV *DstConst = DstAddRec->getStart();
+    const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
+    const SCEV *SrcConst = Src;
+    const Loop *CurLoop = DstAddRec->getLoop();
+    Level = mapDstLoop(CurLoop);
+    return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
+                              CurLoop, Level, Result, NewConstraint) ||
+      gcdMIVtest(Src, Dst, Result);
+  }
+  llvm_unreachable("SIV test expected at least one AddRec");
+  return false;
+}
+
+
+// testRDIV -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
+// where i and j are induction variables, c1 and c2 are loop invariant,
+// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
+// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
+// It doesn't make sense to talk about distance or direction in this case,
+// so there's no point in making special versions of the Strong SIV test or
+// the Weak-crossing SIV test.
+//
+// With minor algebra, this test can also be used for things like
+// [c1 + a1*i + a2*j][c2].
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testRDIV(const SCEV *Src,
+                                  const SCEV *Dst,
+                                  FullDependence &Result) const {
+  // we have 3 possible situations here:
+  //   1) [a*i + b] and [c*j + d]
+  //   2) [a*i + c*j + b] and [d]
+  //   3) [b] and [a*i + c*j + d]
+  // We need to find what we've got and get organized
+
+  const SCEV *SrcConst, *DstConst;
+  const SCEV *SrcCoeff, *DstCoeff;
+  const Loop *SrcLoop, *DstLoop;
+
+  DEBUG(dbgs() << "    src = " << *Src << "\n");
+  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
+  const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
+  const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+  if (SrcAddRec && DstAddRec) {
+    SrcConst = SrcAddRec->getStart();
+    SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+    SrcLoop = SrcAddRec->getLoop();
+    DstConst = DstAddRec->getStart();
+    DstCoeff = DstAddRec->getStepRecurrence(*SE);
+    DstLoop = DstAddRec->getLoop();
+  }
+  else if (SrcAddRec) {
+    if (const SCEVAddRecExpr *tmpAddRec =
+        dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
+      SrcConst = tmpAddRec->getStart();
+      SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
+      SrcLoop = tmpAddRec->getLoop();
+      DstConst = Dst;
+      DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
+      DstLoop = SrcAddRec->getLoop();
+    }
+    else
+      llvm_unreachable("RDIV reached by surprising SCEVs");
+  }
+  else if (DstAddRec) {
+    if (const SCEVAddRecExpr *tmpAddRec =
+        dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
+      DstConst = tmpAddRec->getStart();
+      DstCoeff = tmpAddRec->getStepRecurrence(*SE);
+      DstLoop = tmpAddRec->getLoop();
+      SrcConst = Src;
+      SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
+      SrcLoop = DstAddRec->getLoop();
+    }
+    else
+      llvm_unreachable("RDIV reached by surprising SCEVs");
+  }
+  else
+    llvm_unreachable("RDIV expected at least one AddRec");
+  return exactRDIVtest(SrcCoeff, DstCoeff,
+                       SrcConst, DstConst,
+                       SrcLoop, DstLoop,
+                       Result) ||
+    gcdMIVtest(Src, Dst, Result) ||
+    symbolicRDIVtest(SrcCoeff, DstCoeff,
+                     SrcConst, DstConst,
+                     SrcLoop, DstLoop);
+}
+
+
+// Tests the single-subscript MIV pair (Src and Dst) for dependence.
+// Return true if dependence disproved.
+// Can sometimes refine direction vectors.
+bool DependenceAnalysis::testMIV(const SCEV *Src,
+                                 const SCEV *Dst,
+                                 const SmallBitVector &Loops,
+                                 FullDependence &Result) const {
+  DEBUG(dbgs() << "    src = " << *Src << "\n");
+  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
+  Result.Consistent = false;
+  return gcdMIVtest(Src, Dst, Result) ||
+    banerjeeMIVtest(Src, Dst, Loops, Result);
+}
+
+
+// Given a product, e.g., 10*X*Y, returns the first constant operand,
+// in this case 10. If there is no constant part, returns NULL.
+static
+const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) {
+  for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) {
+    if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op)))
+      return Constant;
+  }
+  return NULL;
+}
+
+
+//===----------------------------------------------------------------------===//
+// gcdMIVtest -
+// Tests an MIV subscript pair for dependence.
+// Returns true if any possible dependence is disproved.
+// Marks the result as inconsistant.
+// Can sometimes disprove the equal direction for 1 or more loops,
+// as discussed in Michael Wolfe's book,
+// High Performance Compilers for Parallel Computing, page 235.
+//
+// We spend some effort (code!) to handle cases like
+// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
+// but M and N are just loop-invariant variables.
+// This should help us handle linearized subscripts;
+// also makes this test a useful backup to the various SIV tests.
+//
+// It occurs to me that the presence of loop-invariant variables
+// changes the nature of the test from "greatest common divisor"
+// to "a common divisor!"
+bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
+                                    const SCEV *Dst,
+                                    FullDependence &Result) const {
+  DEBUG(dbgs() << "starting gcd\n");
+  ++GCDapplications;
+  unsigned BitWidth = Src->getType()->getIntegerBitWidth();
+  APInt RunningGCD = APInt::getNullValue(BitWidth);
+
+  // Examine Src coefficients.
+  // Compute running GCD and record source constant.
+  // Because we're looking for the constant at the end of the chain,
+  // we can't quit the loop just because the GCD == 1.
+  const SCEV *Coefficients = Src;
+  while (const SCEVAddRecExpr *AddRec =
+         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+    const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+    const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
+    if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+      // If the coefficient is the product of a constant and other stuff,
+      // we can use the constant in the GCD computation.
+      Constant = getConstantPart(Product);
+    if (!Constant)
+      return false;
+    APInt ConstCoeff = Constant->getValue()->getValue();
+    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+    Coefficients = AddRec->getStart();
+  }
+  const SCEV *SrcConst = Coefficients;
+
+  // Examine Dst coefficients.
+  // Compute running GCD and record destination constant.
+  // Because we're looking for the constant at the end of the chain,
+  // we can't quit the loop just because the GCD == 1.
+  Coefficients = Dst;
+  while (const SCEVAddRecExpr *AddRec =
+         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+    const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+    const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
+    if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+      // If the coefficient is the product of a constant and other stuff,
+      // we can use the constant in the GCD computation.
+      Constant = getConstantPart(Product);
+    if (!Constant)
+      return false;
+    APInt ConstCoeff = Constant->getValue()->getValue();
+    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+    Coefficients = AddRec->getStart();
+  }
+  const SCEV *DstConst = Coefficients;
+
+  APInt ExtraGCD = APInt::getNullValue(BitWidth);
+  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+  DEBUG(dbgs() << "    Delta = " << *Delta << "\n");
+  const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
+  if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
+    // If Delta is a sum of products, we may be able to make further progress.
+    for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) {
+      const SCEV *Operand = Sum->getOperand(Op);
+      if (isa<SCEVConstant>(Operand)) {
+        assert(!Constant && "Surprised to find multiple constants");
+        Constant = cast<SCEVConstant>(Operand);
+      }
+      else if (isa<SCEVMulExpr>(Operand)) {
+        // Search for constant operand to participate in GCD;
+        // If none found; return false.
+        const SCEVConstant *ConstOp =
+          getConstantPart(cast<SCEVMulExpr>(Operand));
+        APInt ConstOpValue = ConstOp->getValue()->getValue();
+        ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
+                                                   ConstOpValue.abs());
+      }
+      else
+        return false;
+    }
+  }
+  if (!Constant)
+    return false;
+  APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue();
+  DEBUG(dbgs() << "    ConstDelta = " << ConstDelta << "\n");
+  if (ConstDelta == 0)
+    return false;
+  RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
+  DEBUG(dbgs() << "    RunningGCD = " << RunningGCD << "\n");
+  APInt Remainder = ConstDelta.srem(RunningGCD);
+  if (Remainder != 0) {
+    ++GCDindependence;
+    return true;
+  }
+
+  // Try to disprove equal directions.
+  // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
+  // the code above can't disprove the dependence because the GCD = 1.
+  // So we consider what happen if i = i' and what happens if j = j'.
+  // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
+  // which is infeasible, so we can disallow the = direction for the i level.
+  // Setting j = j' doesn't help matters, so we end up with a direction vector
+  // of [<>, *]
+  //
+  // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
+  // we need to remember that the constant part is 5 and the RunningGCD should
+  // be initialized to ExtraGCD = 30.
+  DEBUG(dbgs() << "    ExtraGCD = " << ExtraGCD << '\n');
+
+  bool Improved = false;
+  Coefficients = Src;
+  while (const SCEVAddRecExpr *AddRec =
+         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+    Coefficients = AddRec->getStart();
+    const Loop *CurLoop = AddRec->getLoop();
+    RunningGCD = ExtraGCD;
+    const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
+    const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
+    const SCEV *Inner = Src;
+    while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
+      AddRec = cast<SCEVAddRecExpr>(Inner);
+      const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+      if (CurLoop == AddRec->getLoop())
+        ; // SrcCoeff == Coeff
+      else {
+        if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+          // If the coefficient is the product of a constant and other stuff,
+          // we can use the constant in the GCD computation.
+          Constant = getConstantPart(Product);
+        else
+          Constant = cast<SCEVConstant>(Coeff);
+        APInt ConstCoeff = Constant->getValue()->getValue();
+        RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+      }
+      Inner = AddRec->getStart();
+    }
+    Inner = Dst;
+    while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
+      AddRec = cast<SCEVAddRecExpr>(Inner);
+      const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+      if (CurLoop == AddRec->getLoop())
+        DstCoeff = Coeff;
+      else {
+        if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+          // If the coefficient is the product of a constant and other stuff,
+          // we can use the constant in the GCD computation.
+          Constant = getConstantPart(Product);
+        else
+          Constant = cast<SCEVConstant>(Coeff);
+        APInt ConstCoeff = Constant->getValue()->getValue();
+        RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+      }
+      Inner = AddRec->getStart();
+    }
+    Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
+    if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta))
+      // If the coefficient is the product of a constant and other stuff,
+      // we can use the constant in the GCD computation.
+      Constant = getConstantPart(Product);
+    else if (isa<SCEVConstant>(Delta))
+      Constant = cast<SCEVConstant>(Delta);
+    else {
+      // The difference of the two coefficients might not be a product
+      // or constant, in which case we give up on this direction.
+      continue;
+    }
+    APInt ConstCoeff = Constant->getValue()->getValue();
+    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+    DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
+    if (RunningGCD != 0) {
+      Remainder = ConstDelta.srem(RunningGCD);
+      DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
+      if (Remainder != 0) {
+        unsigned Level = mapSrcLoop(CurLoop);
+        Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
+        Improved = true;
+      }
+    }
+  }
+  if (Improved)
+    ++GCDsuccesses;
+  DEBUG(dbgs() << "all done\n");
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// banerjeeMIVtest -
+// Use Banerjee's Inequalities to test an MIV subscript pair.
+// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
+// Generally follows the discussion in Section 2.5.2 of
+//
+//    Optimizing Supercompilers for Supercomputers
+//    Michael Wolfe
+//
+// The inequalities given on page 25 are simplified in that loops are
+// normalized so that the lower bound is always 0 and the stride is always 1.
+// For example, Wolfe gives
+//
+//     LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+//
+// where A_k is the coefficient of the kth index in the source subscript,
+// B_k is the coefficient of the kth index in the destination subscript,
+// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
+// index, and N_k is the stride of the kth index. Since all loops are normalized
+// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
+// equation to
+//
+//     LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
+//            = (A^-_k - B_k)^- (U_k - 1)  - B_k
+//
+// Similar simplifications are possible for the other equations.
+//
+// When we can't determine the number of iterations for a loop,
+// we use NULL as an indicator for the worst case, infinity.
+// When computing the upper bound, NULL denotes +inf;
+// for the lower bound, NULL denotes -inf.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src,
+                                         const SCEV *Dst,
+                                         const SmallBitVector &Loops,
+                                         FullDependence &Result) const {
+  DEBUG(dbgs() << "starting Banerjee\n");
+  ++BanerjeeApplications;
+  DEBUG(dbgs() << "    Src = " << *Src << '\n');
+  const SCEV *A0;
+  CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
+  DEBUG(dbgs() << "    Dst = " << *Dst << '\n');
+  const SCEV *B0;
+  CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
+  BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
+  const SCEV *Delta = SE->getMinusSCEV(B0, A0);
+  DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
+
+  // Compute bounds for all the * directions.
+  DEBUG(dbgs() << "\tBounds[*]\n");
+  for (unsigned K = 1; K <= MaxLevels; ++K) {
+    Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
+    Bound[K].Direction = Dependence::DVEntry::ALL;
+    Bound[K].DirSet = Dependence::DVEntry::NONE;
+    findBoundsALL(A, B, Bound, K);
+#ifndef NDEBUG
+    DEBUG(dbgs() << "\t    " << K << '\t');
+    if (Bound[K].Lower[Dependence::DVEntry::ALL])
+      DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
+    else
+      DEBUG(dbgs() << "-inf\t");
+    if (Bound[K].Upper[Dependence::DVEntry::ALL])
+      DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
+    else
+      DEBUG(dbgs() << "+inf\n");
+#endif
+  }
+
+  // Test the *, *, *, ... case.
+  bool Disproved = false;
+  if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
+    // Explore the direction vector hierarchy.
+    unsigned DepthExpanded = 0;
+    unsigned NewDeps = exploreDirections(1, A, B, Bound,
+                                         Loops, DepthExpanded, Delta);
+    if (NewDeps > 0) {
+      bool Improved = false;
+      for (unsigned K = 1; K <= CommonLevels; ++K) {
+        if (Loops[K]) {
+          unsigned Old = Result.DV[K - 1].Direction;
+          Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
+          Improved |= Old != Result.DV[K - 1].Direction;
+          if (!Result.DV[K - 1].Direction) {
+            Improved = false;
+            Disproved = true;
+            break;
+          }
+        }
+      }
+      if (Improved)
+        ++BanerjeeSuccesses;
+    }
+    else {
+      ++BanerjeeIndependence;
+      Disproved = true;
+    }
+  }
+  else {
+    ++BanerjeeIndependence;
+    Disproved = true;
+  }
+  delete [] Bound;
+  delete [] A;
+  delete [] B;
+  return Disproved;
+}
+
+
+// Hierarchically expands the direction vector
+// search space, combining the directions of discovered dependences
+// in the DirSet field of Bound. Returns the number of distinct
+// dependences discovered. If the dependence is disproved,
+// it will return 0.
+unsigned DependenceAnalysis::exploreDirections(unsigned Level,
+                                               CoefficientInfo *A,
+                                               CoefficientInfo *B,
+                                               BoundInfo *Bound,
+                                               const SmallBitVector &Loops,
+                                               unsigned &DepthExpanded,
+                                               const SCEV *Delta) const {
+  if (Level > CommonLevels) {
+    // record result
+    DEBUG(dbgs() << "\t[");
+    for (unsigned K = 1; K <= CommonLevels; ++K) {
+      if (Loops[K]) {
+        Bound[K].DirSet |= Bound[K].Direction;
+#ifndef NDEBUG
+        switch (Bound[K].Direction) {
+        case Dependence::DVEntry::LT:
+          DEBUG(dbgs() << " <");
+          break;
+        case Dependence::DVEntry::EQ:
+          DEBUG(dbgs() << " =");
+          break;
+        case Dependence::DVEntry::GT:
+          DEBUG(dbgs() << " >");
+          break;
+        case Dependence::DVEntry::ALL:
+          DEBUG(dbgs() << " *");
+          break;
+        default:
+          llvm_unreachable("unexpected Bound[K].Direction");
+        }
+#endif
+      }
+    }
+    DEBUG(dbgs() << " ]\n");
+    return 1;
+  }
+  if (Loops[Level]) {
+    if (Level > DepthExpanded) {
+      DepthExpanded = Level;
+      // compute bounds for <, =, > at current level
+      findBoundsLT(A, B, Bound, Level);
+      findBoundsGT(A, B, Bound, Level);
+      findBoundsEQ(A, B, Bound, Level);
+#ifndef NDEBUG
+      DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
+      DEBUG(dbgs() << "\t    <\t");
+      if (Bound[Level].Lower[Dependence::DVEntry::LT])
+        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t');
+      else
+        DEBUG(dbgs() << "-inf\t");
+      if (Bound[Level].Upper[Dependence::DVEntry::LT])
+        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n');
+      else
+        DEBUG(dbgs() << "+inf\n");
+      DEBUG(dbgs() << "\t    =\t");
+      if (Bound[Level].Lower[Dependence::DVEntry::EQ])
+        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t');
+      else
+        DEBUG(dbgs() << "-inf\t");
+      if (Bound[Level].Upper[Dependence::DVEntry::EQ])
+        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n');
+      else
+        DEBUG(dbgs() << "+inf\n");
+      DEBUG(dbgs() << "\t    >\t");
+      if (Bound[Level].Lower[Dependence::DVEntry::GT])
+        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t');
+      else
+        DEBUG(dbgs() << "-inf\t");
+      if (Bound[Level].Upper[Dependence::DVEntry::GT])
+        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n');
+      else
+        DEBUG(dbgs() << "+inf\n");
+#endif
+    }
+
+    unsigned NewDeps = 0;
+
+    // test bounds for <, *, *, ...
+    if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
+      NewDeps += exploreDirections(Level + 1, A, B, Bound,
+                                   Loops, DepthExpanded, Delta);
+
+    // Test bounds for =, *, *, ...
+    if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
+      NewDeps += exploreDirections(Level + 1, A, B, Bound,
+                                   Loops, DepthExpanded, Delta);
+
+    // test bounds for >, *, *, ...
+    if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
+      NewDeps += exploreDirections(Level + 1, A, B, Bound,
+                                   Loops, DepthExpanded, Delta);
+
+    Bound[Level].Direction = Dependence::DVEntry::ALL;
+    return NewDeps;
+  }
+  else
+    return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
+}
+
+
+// Returns true iff the current bounds are plausible.
+bool DependenceAnalysis::testBounds(unsigned char DirKind,
+                                    unsigned Level,
+                                    BoundInfo *Bound,
+                                    const SCEV *Delta) const {
+  Bound[Level].Direction = DirKind;
+  if (const SCEV *LowerBound = getLowerBound(Bound))
+    if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
+      return false;
+  if (const SCEV *UpperBound = getUpperBound(Bound))
+    if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
+      return false;
+  return true;
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the * direction. Records them in Bound.
+// Wolfe gives the equations
+//
+//    LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
+//    UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+//    LB^*_k = (A^-_k - B^+_k)U_k
+//    UB^*_k = (A^+_k - B^-_k)U_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+// Note that the lower bound is always <= 0
+// and the upper bound is always >= 0.
+void DependenceAnalysis::findBoundsALL(CoefficientInfo *A,
+                                       CoefficientInfo *B,
+                                       BoundInfo *Bound,
+                                       unsigned K) const {
+  Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity.
+  if (Bound[K].Iterations) {
+    Bound[K].Lower[Dependence::DVEntry::ALL] =
+      SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
+                     Bound[K].Iterations);
+    Bound[K].Upper[Dependence::DVEntry::ALL] =
+      SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
+                     Bound[K].Iterations);
+  }
+  else {
+    // If the difference is 0, we won't need to know the number of iterations.
+    if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
+      Bound[K].Lower[Dependence::DVEntry::ALL] =
+        SE->getConstant(A[K].Coeff->getType(), 0);
+    if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
+      Bound[K].Upper[Dependence::DVEntry::ALL] =
+        SE->getConstant(A[K].Coeff->getType(), 0);
+  }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the = direction. Records them in Bound.
+// Wolfe gives the equations
+//
+//    LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
+//    UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+//    LB^=_k = (A_k - B_k)^- U_k
+//    UB^=_k = (A_k - B_k)^+ U_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+// Note that the lower bound is always <= 0
+// and the upper bound is always >= 0.
+void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A,
+                                      CoefficientInfo *B,
+                                      BoundInfo *Bound,
+                                      unsigned K) const {
+  Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity.
+  if (Bound[K].Iterations) {
+    const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
+    const SCEV *NegativePart = getNegativePart(Delta);
+    Bound[K].Lower[Dependence::DVEntry::EQ] =
+      SE->getMulExpr(NegativePart, Bound[K].Iterations);
+    const SCEV *PositivePart = getPositivePart(Delta);
+    Bound[K].Upper[Dependence::DVEntry::EQ] =
+      SE->getMulExpr(PositivePart, Bound[K].Iterations);
+  }
+  else {
+    // If the positive/negative part of the difference is 0,
+    // we won't need to know the number of iterations.
+    const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
+    const SCEV *NegativePart = getNegativePart(Delta);
+    if (NegativePart->isZero())
+      Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
+    const SCEV *PositivePart = getPositivePart(Delta);
+    if (PositivePart->isZero())
+      Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
+  }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the < direction. Records them in Bound.
+// Wolfe gives the equations
+//
+//    LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+//    UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+//    LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
+//    UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+void DependenceAnalysis::findBoundsLT(CoefficientInfo *A,
+                                      CoefficientInfo *B,
+                                      BoundInfo *Bound,
+                                      unsigned K) const {
+  Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity.
+  if (Bound[K].Iterations) {
+    const SCEV *Iter_1 =
+      SE->getMinusSCEV(Bound[K].Iterations,
+                       SE->getConstant(Bound[K].Iterations->getType(), 1));
+    const SCEV *NegPart =
+      getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
+    Bound[K].Lower[Dependence::DVEntry::LT] =
+      SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
+    const SCEV *PosPart =
+      getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
+    Bound[K].Upper[Dependence::DVEntry::LT] =
+      SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
+  }
+  else {
+    // If the positive/negative part of the difference is 0,
+    // we won't need to know the number of iterations.
+    const SCEV *NegPart =
+      getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
+    if (NegPart->isZero())
+      Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
+    const SCEV *PosPart =
+      getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
+    if (PosPart->isZero())
+      Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
+  }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the > direction. Records them in Bound.
+// Wolfe gives the equations
+//
+//    LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
+//    UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+//    LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
+//    UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
+                                      CoefficientInfo *B,
+                                      BoundInfo *Bound,
+                                      unsigned K) const {
+  Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity.
+  if (Bound[K].Iterations) {
+    const SCEV *Iter_1 =
+      SE->getMinusSCEV(Bound[K].Iterations,
+                       SE->getConstant(Bound[K].Iterations->getType(), 1));
+    const SCEV *NegPart =
+      getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
+    Bound[K].Lower[Dependence::DVEntry::GT] =
+      SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
+    const SCEV *PosPart =
+      getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
+    Bound[K].Upper[Dependence::DVEntry::GT] =
+      SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
+  }
+  else {
+    // If the positive/negative part of the difference is 0,
+    // we won't need to know the number of iterations.
+    const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
+    if (NegPart->isZero())
+      Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
+    const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
+    if (PosPart->isZero())
+      Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
+  }
+}
+
+
+// X^+ = max(X, 0)
+const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const {
+  return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0));
+}
+
+
+// X^- = min(X, 0)
+const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const {
+  return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0));
+}
+
+
+// Walks through the subscript,
+// collecting each coefficient, the associated loop bounds,
+// and recording its positive and negative parts for later use.
+DependenceAnalysis::CoefficientInfo *
+DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript,
+                                     bool SrcFlag,
+                                     const SCEV *&Constant) const {
+  const SCEV *Zero = SE->getConstant(Subscript->getType(), 0);
+  CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
+  for (unsigned K = 1; K <= MaxLevels; ++K) {
+    CI[K].Coeff = Zero;
+    CI[K].PosPart = Zero;
+    CI[K].NegPart = Zero;
+    CI[K].Iterations = NULL;
+  }
+  while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
+    const Loop *L = AddRec->getLoop();
+    unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);
+    CI[K].Coeff = AddRec->getStepRecurrence(*SE);
+    CI[K].PosPart = getPositivePart(CI[K].Coeff);
+    CI[K].NegPart = getNegativePart(CI[K].Coeff);
+    CI[K].Iterations = collectUpperBound(L, Subscript->getType());
+    Subscript = AddRec->getStart();
+  }
+  Constant = Subscript;
+#ifndef NDEBUG
+  DEBUG(dbgs() << "\tCoefficient Info\n");
+  for (unsigned K = 1; K <= MaxLevels; ++K) {
+    DEBUG(dbgs() << "\t    " << K << "\t" << *CI[K].Coeff);
+    DEBUG(dbgs() << "\tPos Part = ");
+    DEBUG(dbgs() << *CI[K].PosPart);
+    DEBUG(dbgs() << "\tNeg Part = ");
+    DEBUG(dbgs() << *CI[K].NegPart);
+    DEBUG(dbgs() << "\tUpper Bound = ");
+    if (CI[K].Iterations)
+      DEBUG(dbgs() << *CI[K].Iterations);
+    else
+      DEBUG(dbgs() << "+inf");
+    DEBUG(dbgs() << '\n');
+  }
+  DEBUG(dbgs() << "\t    Constant = " << *Subscript << '\n');
+#endif
+  return CI;
+}
+
+
+// Looks through all the bounds info and
+// computes the lower bound given the current direction settings
+// at each level. If the lower bound for any level is -inf,
+// the result is -inf.
+const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const {
+  const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
+  for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
+    if (Bound[K].Lower[Bound[K].Direction])
+      Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
+    else
+      Sum = NULL;
+  }
+  return Sum;
+}
+
+
+// Looks through all the bounds info and
+// computes the upper bound given the current direction settings
+// at each level. If the upper bound at any level is +inf,
+// the result is +inf.
+const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const {
+  const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
+  for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
+    if (Bound[K].Upper[Bound[K].Direction])
+      Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
+    else
+      Sum = NULL;
+  }
+  return Sum;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constraint manipulation for Delta test.
+
+// Given a linear SCEV,
+// return the coefficient (the step)
+// corresponding to the specified loop.
+// If there isn't one, return 0.
+// For example, given a*i + b*j + c*k, zeroing the coefficient
+// corresponding to the j loop would yield b.
+const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
+                                                const Loop *TargetLoop)  const {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+  if (!AddRec)
+    return SE->getConstant(Expr->getType(), 0);
+  if (AddRec->getLoop() == TargetLoop)
+    return AddRec->getStepRecurrence(*SE);
+  return findCoefficient(AddRec->getStart(), TargetLoop);
+}
+
+
+// Given a linear SCEV,
+// return the SCEV given by zeroing out the coefficient
+// corresponding to the specified loop.
+// For example, given a*i + b*j + c*k, zeroing the coefficient
+// corresponding to the j loop would yield a*i + c*k.
+const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr,
+                                                const Loop *TargetLoop)  const {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+  if (!AddRec)
+    return Expr; // ignore
+  if (AddRec->getLoop() == TargetLoop)
+    return AddRec->getStart();
+  return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
+                           AddRec->getStepRecurrence(*SE),
+                           AddRec->getLoop(),
+                           AddRec->getNoWrapFlags());
+}
+
+
+// Given a linear SCEV Expr,
+// return the SCEV given by adding some Value to the
+// coefficient corresponding to the specified TargetLoop.
+// For example, given a*i + b*j + c*k, adding 1 to the coefficient
+// corresponding to the j loop would yield a*i + (b+1)*j + c*k.
+const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr,
+                                                 const Loop *TargetLoop,
+                                                 const SCEV *Value)  const {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+  if (!AddRec) // create a new addRec
+    return SE->getAddRecExpr(Expr,
+                             Value,
+                             TargetLoop,
+                             SCEV::FlagAnyWrap); // Worst case, with no info.
+  if (AddRec->getLoop() == TargetLoop) {
+    const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
+    if (Sum->isZero())
+      return AddRec->getStart();
+    return SE->getAddRecExpr(AddRec->getStart(),
+                             Sum,
+                             AddRec->getLoop(),
+                             AddRec->getNoWrapFlags());
+  }
+  return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(),
+                                            TargetLoop, Value),
+                           AddRec->getStepRecurrence(*SE),
+                           AddRec->getLoop(),
+                           AddRec->getNoWrapFlags());
+}
+
+
+// Review the constraints, looking for opportunities
+// to simplify a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+// Corresponds to Figure 5 from the paper
+//
+//            Practical Dependence Testing
+//            Goff, Kennedy, Tseng
+//            PLDI 1991
+bool DependenceAnalysis::propagate(const SCEV *&Src,
+                                   const SCEV *&Dst,
+                                   SmallBitVector &Loops,
+                                   SmallVector<Constraint, 4> &Constraints,
+                                   bool &Consistent) {
+  bool Result = false;
+  for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) {
+    DEBUG(dbgs() << "\t    Constraint[" << LI << "] is");
+    DEBUG(Constraints[LI].dump(dbgs()));
+    if (Constraints[LI].isDistance())
+      Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
+    else if (Constraints[LI].isLine())
+      Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
+    else if (Constraints[LI].isPoint())
+      Result |= propagatePoint(Src, Dst, Constraints[LI]);
+  }
+  return Result;
+}
+
+
+// Attempt to propagate a distance
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+bool DependenceAnalysis::propagateDistance(const SCEV *&Src,
+                                           const SCEV *&Dst,
+                                           Constraint &CurConstraint,
+                                           bool &Consistent) {
+  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+  DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
+  const SCEV *A_K = findCoefficient(Src, CurLoop);
+  if (A_K->isZero())
+    return false;
+  const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
+  Src = SE->getMinusSCEV(Src, DA_K);
+  Src = zeroCoefficient(Src, CurLoop);
+  DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
+  DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
+  Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
+  DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
+  if (!findCoefficient(Dst, CurLoop)->isZero())
+    Consistent = false;
+  return true;
+}
+
+
+// Attempt to propagate a line
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+bool DependenceAnalysis::propagateLine(const SCEV *&Src,
+                                       const SCEV *&Dst,
+                                       Constraint &CurConstraint,
+                                       bool &Consistent) {
+  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+  const SCEV *A = CurConstraint.getA();
+  const SCEV *B = CurConstraint.getB();
+  const SCEV *C = CurConstraint.getC();
+  DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n");
+  DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
+  DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
+  if (A->isZero()) {
+    const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
+    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+    if (!Bconst || !Cconst) return false;
+    APInt Beta = Bconst->getValue()->getValue();
+    APInt Charlie = Cconst->getValue()->getValue();
+    APInt CdivB = Charlie.sdiv(Beta);
+    assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
+    const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+    //    Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
+    Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
+    Dst = zeroCoefficient(Dst, CurLoop);
+    if (!findCoefficient(Src, CurLoop)->isZero())
+      Consistent = false;
+  }
+  else if (B->isZero()) {
+    const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
+    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+    if (!Aconst || !Cconst) return false;
+    APInt Alpha = Aconst->getValue()->getValue();
+    APInt Charlie = Cconst->getValue()->getValue();
+    APInt CdivA = Charlie.sdiv(Alpha);
+    assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
+    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
+    Src = zeroCoefficient(Src, CurLoop);
+    if (!findCoefficient(Dst, CurLoop)->isZero())
+      Consistent = false;
+  }
+  else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
+    const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
+    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+    if (!Aconst || !Cconst) return false;
+    APInt Alpha = Aconst->getValue()->getValue();
+    APInt Charlie = Cconst->getValue()->getValue();
+    APInt CdivA = Charlie.sdiv(Alpha);
+    assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
+    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
+    Src = zeroCoefficient(Src, CurLoop);
+    Dst = addToCoefficient(Dst, CurLoop, A_K);
+    if (!findCoefficient(Dst, CurLoop)->isZero())
+      Consistent = false;
+  }
+  else {
+    // paper is incorrect here, or perhaps just misleading
+    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    Src = SE->getMulExpr(Src, A);
+    Dst = SE->getMulExpr(Dst, A);
+    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
+    Src = zeroCoefficient(Src, CurLoop);
+    Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
+    if (!findCoefficient(Dst, CurLoop)->isZero())
+      Consistent = false;
+  }
+  DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
+  DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
+  return true;
+}
+
+
+// Attempt to propagate a point
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+bool DependenceAnalysis::propagatePoint(const SCEV *&Src,
+                                        const SCEV *&Dst,
+                                        Constraint &CurConstraint) {
+  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+  const SCEV *A_K = findCoefficient(Src, CurLoop);
+  const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+  const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
+  const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
+  DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
+  Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
+  Src = zeroCoefficient(Src, CurLoop);
+  DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
+  DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
+  Dst = zeroCoefficient(Dst, CurLoop);
+  DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
+  return true;
+}
+
+
+// Update direction vector entry based on the current constraint.
+void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
+                                         const Constraint &CurConstraint
+                                         ) const {
+  DEBUG(dbgs() << "\tUpdate direction, constraint =");
+  DEBUG(CurConstraint.dump(dbgs()));
+  if (CurConstraint.isAny())
+    ; // use defaults
+  else if (CurConstraint.isDistance()) {
+    // this one is consistent, the others aren't
+    Level.Scalar = false;
+    Level.Distance = CurConstraint.getD();
+    unsigned NewDirection = Dependence::DVEntry::NONE;
+    if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
+      NewDirection = Dependence::DVEntry::EQ;
+    if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
+      NewDirection |= Dependence::DVEntry::LT;
+    if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
+      NewDirection |= Dependence::DVEntry::GT;
+    Level.Direction &= NewDirection;
+  }
+  else if (CurConstraint.isLine()) {
+    Level.Scalar = false;
+    Level.Distance = NULL;
+    // direction should be accurate
+  }
+  else if (CurConstraint.isPoint()) {
+    Level.Scalar = false;
+    Level.Distance = NULL;
+    unsigned NewDirection = Dependence::DVEntry::NONE;
+    if (!isKnownPredicate(CmpInst::ICMP_NE,
+                          CurConstraint.getY(),
+                          CurConstraint.getX()))
+      // if X may be = Y
+      NewDirection |= Dependence::DVEntry::EQ;
+    if (!isKnownPredicate(CmpInst::ICMP_SLE,
+                          CurConstraint.getY(),
+                          CurConstraint.getX()))
+      // if Y may be > X
+      NewDirection |= Dependence::DVEntry::LT;
+    if (!isKnownPredicate(CmpInst::ICMP_SGE,
+                          CurConstraint.getY(),
+                          CurConstraint.getX()))
+      // if Y may be < X
+      NewDirection |= Dependence::DVEntry::GT;
+    Level.Direction &= NewDirection;
+  }
+  else
+    llvm_unreachable("constraint has unexpected kind");
+}
+
+
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+// For debugging purposes, dump a small bit vector to dbgs().
+static void dumpSmallBitVector(SmallBitVector &BV) {
+  dbgs() << "{";
+  for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) {
+    dbgs() << VI;
+    if (BV.find_next(VI) >= 0)
+      dbgs() << ' ';
+  }
+  dbgs() << "}\n";
+}
+#endif
+
+
+// depends -
+// Returns NULL if there is no dependence.
+// Otherwise, return a Dependence with as many details as possible.
+// Corresponds to Section 3.1 in the paper
+//
+//            Practical Dependence Testing
+//            Goff, Kennedy, Tseng
+//            PLDI 1991
+//
+// Care is required to keep the code below up to date w.r.t. this routine.
+Dependence *DependenceAnalysis::depends(const Instruction *Src,
+                                        const Instruction *Dst,
+                                        bool PossiblyLoopIndependent) {
+  if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
+      (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
+    // if both instructions don't reference memory, there's no dependence
+    return NULL;
+
+  if (!isLoadOrStore(Src) || !isLoadOrStore(Dst))
+    // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
+    return new Dependence(Src, Dst);
+
+  const Value *SrcPtr = getPointerOperand(Src);
+  const Value *DstPtr = getPointerOperand(Dst);
+
+  switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) {
+  case AliasAnalysis::MayAlias:
+  case AliasAnalysis::PartialAlias:
+    // cannot analyse objects if we don't understand their aliasing.
+    return new Dependence(Src, Dst);
+  case AliasAnalysis::NoAlias:
+    // If the objects noalias, they are distinct, accesses are independent.
+    return NULL;
+  case AliasAnalysis::MustAlias:
+    break; // The underlying objects alias; test accesses for dependence.
+  }
+
+  const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+  const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+  if (!SrcGEP || !DstGEP)
+    return new Dependence(Src, Dst); // missing GEP, assume dependence
+
+  if (SrcGEP->getPointerOperandType() != DstGEP->getPointerOperandType())
+    return new Dependence(Src, Dst); // different types, assume dependence
+
+  // establish loop nesting levels
+  establishNestingLevels(Src, Dst);
+  DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
+  DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
+
+  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
+  ++TotalArrayPairs;
+
+  // classify subscript pairs
+  unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin();
+  SmallVector<Subscript, 4> Pair(Pairs);
+  for (unsigned SI = 0; SI < Pairs; ++SI) {
+    Pair[SI].Loops.resize(MaxLevels + 1);
+    Pair[SI].GroupLoops.resize(MaxLevels + 1);
+    Pair[SI].Group.resize(Pairs);
+  }
+  Pairs = 0;
+  for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+         SrcEnd = SrcGEP->idx_end(),
+         DstIdx = DstGEP->idx_begin(),
+         DstEnd = DstGEP->idx_end();
+       SrcIdx != SrcEnd && DstIdx != DstEnd;
+       ++SrcIdx, ++DstIdx, ++Pairs) {
+    Pair[Pairs].Src = SE->getSCEV(*SrcIdx);
+    Pair[Pairs].Dst = SE->getSCEV(*DstIdx);
+    removeMatchingExtensions(&Pair[Pairs]);
+    Pair[Pairs].Classification =
+      classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()),
+                   Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()),
+                   Pair[Pairs].Loops);
+    Pair[Pairs].GroupLoops = Pair[Pairs].Loops;
+    Pair[Pairs].Group.set(Pairs);
+    DEBUG(dbgs() << "    subscript " << Pairs << "\n");
+    DEBUG(dbgs() << "\tsrc = " << *Pair[Pairs].Src << "\n");
+    DEBUG(dbgs() << "\tdst = " << *Pair[Pairs].Dst << "\n");
+    DEBUG(dbgs() << "\tclass = " << Pair[Pairs].Classification << "\n");
+    DEBUG(dbgs() << "\tloops = ");
+    DEBUG(dumpSmallBitVector(Pair[Pairs].Loops));
+  }
+
+  SmallBitVector Separable(Pairs);
+  SmallBitVector Coupled(Pairs);
+
+  // Partition subscripts into separable and minimally-coupled groups
+  // Algorithm in paper is algorithmically better;
+  // this may be faster in practice. Check someday.
+  //
+  // Here's an example of how it works. Consider this code:
+  //
+  //   for (i = ...) {
+  //     for (j = ...) {
+  //       for (k = ...) {
+  //         for (l = ...) {
+  //           for (m = ...) {
+  //             A[i][j][k][m] = ...;
+  //             ... = A[0][j][l][i + j];
+  //           }
+  //         }
+  //       }
+  //     }
+  //   }
+  //
+  // There are 4 subscripts here:
+  //    0 [i] and [0]
+  //    1 [j] and [j]
+  //    2 [k] and [l]
+  //    3 [m] and [i + j]
+  //
+  // We've already classified each subscript pair as ZIV, SIV, etc.,
+  // and collected all the loops mentioned by pair P in Pair[P].Loops.
+  // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
+  // and set Pair[P].Group = {P}.
+  //
+  //      Src Dst    Classification Loops  GroupLoops Group
+  //    0 [i] [0]         SIV       {1}      {1}        {0}
+  //    1 [j] [j]         SIV       {2}      {2}        {1}
+  //    2 [k] [l]         RDIV      {3,4}    {3,4}      {2}
+  //    3 [m] [i + j]     MIV       {1,2,5}  {1,2,5}    {3}
+  //
+  // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
+  // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
+  //
+  // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
+  // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
+  // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
+  // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
+  // to either Separable or Coupled).
+  //
+  // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
+  // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty,
+  // so Pair[3].Group = {0, 1, 3} and Done = false.
+  //
+  // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
+  // Since Done remains true, we add 2 to the set of Separable pairs.
+  //
+  // Finally, we consider 3. There's nothing to compare it with,
+  // so Done remains true and we add it to the Coupled set.
+  // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
+  //
+  // In the end, we've got 1 separable subscript and 1 coupled group.
+  for (unsigned SI = 0; SI < Pairs; ++SI) {
+    if (Pair[SI].Classification == Subscript::NonLinear) {
+      // ignore these, but collect loops for later
+      ++NonlinearSubscriptPairs;
+      collectCommonLoops(Pair[SI].Src,
+                         LI->getLoopFor(Src->getParent()),
+                         Pair[SI].Loops);
+      collectCommonLoops(Pair[SI].Dst,
+                         LI->getLoopFor(Dst->getParent()),
+                         Pair[SI].Loops);
+      Result.Consistent = false;
+    }
+    else if (Pair[SI].Classification == Subscript::ZIV) {
+      // always separable
+      Separable.set(SI);
+    }
+    else {
+      // SIV, RDIV, or MIV, so check for coupled group
+      bool Done = true;
+      for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
+        SmallBitVector Intersection = Pair[SI].GroupLoops;
+        Intersection &= Pair[SJ].GroupLoops;
+        if (Intersection.any()) {
+          // accumulate set of all the loops in group
+          Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
+          // accumulate set of all subscripts in group
+          Pair[SJ].Group |= Pair[SI].Group;
+          Done = false;
+        }
+      }
+      if (Done) {
+        if (Pair[SI].Group.count() == 1) {
+          Separable.set(SI);
+          ++SeparableSubscriptPairs;
+        }
+        else {
+          Coupled.set(SI);
+          ++CoupledSubscriptPairs;
+        }
+      }
+    }
+  }
+
+  DEBUG(dbgs() << "    Separable = ");
+  DEBUG(dumpSmallBitVector(Separable));
+  DEBUG(dbgs() << "    Coupled = ");
+  DEBUG(dumpSmallBitVector(Coupled));
+
+  Constraint NewConstraint;
+  NewConstraint.setAny(SE);
+
+  // test separable subscripts
+  for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+    DEBUG(dbgs() << "testing subscript " << SI);
+    switch (Pair[SI].Classification) {
+    case Subscript::ZIV:
+      DEBUG(dbgs() << ", ZIV\n");
+      if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
+        return NULL;
+      break;
+    case Subscript::SIV: {
+      DEBUG(dbgs() << ", SIV\n");
+      unsigned Level;
+      const SCEV *SplitIter = NULL;
+      if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
+                  Result, NewConstraint, SplitIter))
+        return NULL;
+      break;
+    }
+    case Subscript::RDIV:
+      DEBUG(dbgs() << ", RDIV\n");
+      if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
+        return NULL;
+      break;
+    case Subscript::MIV:
+      DEBUG(dbgs() << ", MIV\n");
+      if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
+        return NULL;
+      break;
+    default:
+      llvm_unreachable("subscript has unexpected classification");
+    }
+  }
+
+  if (Coupled.count()) {
+    // test coupled subscript groups
+    DEBUG(dbgs() << "starting on coupled subscripts\n");
+    DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
+    SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
+    for (unsigned II = 0; II <= MaxLevels; ++II)
+      Constraints[II].setAny(SE);
+    for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+      DEBUG(dbgs() << "testing subscript group " << SI << " { ");
+      SmallBitVector Group(Pair[SI].Group);
+      SmallBitVector Sivs(Pairs);
+      SmallBitVector Mivs(Pairs);
+      SmallBitVector ConstrainedLevels(MaxLevels + 1);
+      for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+        DEBUG(dbgs() << SJ << " ");
+        if (Pair[SJ].Classification == Subscript::SIV)
+          Sivs.set(SJ);
+        else
+          Mivs.set(SJ);
+      }
+      DEBUG(dbgs() << "}\n");
+      while (Sivs.any()) {
+        bool Changed = false;
+        for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+          DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
+          // SJ is an SIV subscript that's part of the current coupled group
+          unsigned Level;
+          const SCEV *SplitIter = NULL;
+          DEBUG(dbgs() << "SIV\n");
+          if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
+                      Result, NewConstraint, SplitIter))
+            return NULL;
+          ConstrainedLevels.set(Level);
+          if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
+            if (Constraints[Level].isEmpty()) {
+              ++DeltaIndependence;
+              return NULL;
+            }
+            Changed = true;
+          }
+          Sivs.reset(SJ);
+        }
+        if (Changed) {
+          // propagate, possibly creating new SIVs and ZIVs
+          DEBUG(dbgs() << "    propagating\n");
+          DEBUG(dbgs() << "\tMivs = ");
+          DEBUG(dumpSmallBitVector(Mivs));
+          for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+            // SJ is an MIV subscript that's part of the current coupled group
+            DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
+            if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
+                          Constraints, Result.Consistent)) {
+              DEBUG(dbgs() << "\t    Changed\n");
+              ++DeltaPropagations;
+              Pair[SJ].Classification =
+                classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
+                             Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
+                             Pair[SJ].Loops);
+              switch (Pair[SJ].Classification) {
+              case Subscript::ZIV:
+                DEBUG(dbgs() << "ZIV\n");
+                if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
+                  return NULL;
+                Mivs.reset(SJ);
+                break;
+              case Subscript::SIV:
+                Sivs.set(SJ);
+                Mivs.reset(SJ);
+                break;
+              case Subscript::RDIV:
+              case Subscript::MIV:
+                break;
+              default:
+                llvm_unreachable("bad subscript classification");
+              }
+            }
+          }
+        }
+      }
+
+      // test & propagate remaining RDIVs
+      for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+        if (Pair[SJ].Classification == Subscript::RDIV) {
+          DEBUG(dbgs() << "RDIV test\n");
+          if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
+            return NULL;
+          // I don't yet understand how to propagate RDIV results
+          Mivs.reset(SJ);
+        }
+      }
+
+      // test remaining MIVs
+      // This code is temporary.
+      // Better to somehow test all remaining subscripts simultaneously.
+      for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+        if (Pair[SJ].Classification == Subscript::MIV) {
+          DEBUG(dbgs() << "MIV test\n");
+          if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
+            return NULL;
+        }
+        else
+          llvm_unreachable("expected only MIV subscripts at this point");
+      }
+
+      // update Result.DV from constraint vector
+      DEBUG(dbgs() << "    updating\n");
+      for (int SJ = ConstrainedLevels.find_first();
+           SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) {
+        updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
+        if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
+          return NULL;
+      }
+    }
+  }
+
+  // make sure Scalar flags are set correctly
+  SmallBitVector CompleteLoops(MaxLevels + 1);
+  for (unsigned SI = 0; SI < Pairs; ++SI)
+    CompleteLoops |= Pair[SI].Loops;
+  for (unsigned II = 1; II <= CommonLevels; ++II)
+    if (CompleteLoops[II])
+      Result.DV[II - 1].Scalar = false;
+
+  // make sure loopIndepent flag is set correctly
+  if (PossiblyLoopIndependent) {
+    for (unsigned II = 1; II <= CommonLevels; ++II) {
+      if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
+        Result.LoopIndependent = false;
+        break;
+      }
+    }
+  }
+
+  FullDependence *Final = new FullDependence(Result);
+  Result.DV = NULL;
+  return Final;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// getSplitIteration -
+// Rather than spend rarely-used space recording the splitting iteration
+// during the Weak-Crossing SIV test, we re-compute it on demand.
+// The re-computation is basically a repeat of the entire dependence test,
+// though simplified since we know that the dependence exists.
+// It's tedious, since we must go through all propagations, etc.
+//
+// Care is required to keep this code up to date w.r.t. the code above.
+//
+// Generally, the dependence analyzer will be used to build
+// a dependence graph for a function (basically a map from instructions
+// to dependences). Looking for cycles in the graph shows us loops
+// that cannot be trivially vectorized/parallelized.
+//
+// We can try to improve the situation by examining all the dependences
+// that make up the cycle, looking for ones we can break.
+// Sometimes, peeling the first or last iteration of a loop will break
+// dependences, and we've got flags for those possibilities.
+// Sometimes, splitting a loop at some other iteration will do the trick,
+// and we've got a flag for that case. Rather than waste the space to
+// record the exact iteration (since we rarely know), we provide
+// a method that calculates the iteration. It's a drag that it must work
+// from scratch, but wonderful in that it's possible.
+//
+// Here's an example:
+//
+//    for (i = 0; i < 10; i++)
+//        A[i] = ...
+//        ... = A[11 - i]
+//
+// There's a loop-carried flow dependence from the store to the load,
+// found by the weak-crossing SIV test. The dependence will have a flag,
+// indicating that the dependence can be broken by splitting the loop.
+// Calling getSplitIteration will return 5.
+// Splitting the loop breaks the dependence, like so:
+//
+//    for (i = 0; i <= 5; i++)
+//        A[i] = ...
+//        ... = A[11 - i]
+//    for (i = 6; i < 10; i++)
+//        A[i] = ...
+//        ... = A[11 - i]
+//
+// breaks the dependence and allows us to vectorize/parallelize
+// both loops.
+const  SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
+                                                   unsigned SplitLevel) {
+  assert(Dep && "expected a pointer to a Dependence");
+  assert(Dep->isSplitable(SplitLevel) &&
+         "Dep should be splitable at SplitLevel");
+  const Instruction *Src = Dep->getSrc();
+  const Instruction *Dst = Dep->getDst();
+  assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
+  assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
+  assert(isLoadOrStore(Src));
+  assert(isLoadOrStore(Dst));
+  const Value *SrcPtr = getPointerOperand(Src);
+  const Value *DstPtr = getPointerOperand(Dst);
+  assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) ==
+         AliasAnalysis::MustAlias);
+  const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+  const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+  assert(SrcGEP);
+  assert(DstGEP);
+  assert(SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType());
+
+  // establish loop nesting levels
+  establishNestingLevels(Src, Dst);
+
+  FullDependence Result(Src, Dst, false, CommonLevels);
+
+  // classify subscript pairs
+  unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin();
+  SmallVector<Subscript, 4> Pair(Pairs);
+  for (unsigned SI = 0; SI < Pairs; ++SI) {
+    Pair[SI].Loops.resize(MaxLevels + 1);
+    Pair[SI].GroupLoops.resize(MaxLevels + 1);
+    Pair[SI].Group.resize(Pairs);
+  }
+  Pairs = 0;
+  for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+         SrcEnd = SrcGEP->idx_end(),
+         DstIdx = DstGEP->idx_begin(),
+         DstEnd = DstGEP->idx_end();
+       SrcIdx != SrcEnd && DstIdx != DstEnd;
+       ++SrcIdx, ++DstIdx, ++Pairs) {
+    Pair[Pairs].Src = SE->getSCEV(*SrcIdx);
+    Pair[Pairs].Dst = SE->getSCEV(*DstIdx);
+    Pair[Pairs].Classification =
+      classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()),
+                   Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()),
+                   Pair[Pairs].Loops);
+    Pair[Pairs].GroupLoops = Pair[Pairs].Loops;
+    Pair[Pairs].Group.set(Pairs);
+  }
+
+  SmallBitVector Separable(Pairs);
+  SmallBitVector Coupled(Pairs);
+
+  // partition subscripts into separable and minimally-coupled groups
+  for (unsigned SI = 0; SI < Pairs; ++SI) {
+    if (Pair[SI].Classification == Subscript::NonLinear) {
+      // ignore these, but collect loops for later
+      collectCommonLoops(Pair[SI].Src,
+                         LI->getLoopFor(Src->getParent()),
+                         Pair[SI].Loops);
+      collectCommonLoops(Pair[SI].Dst,
+                         LI->getLoopFor(Dst->getParent()),
+                         Pair[SI].Loops);
+      Result.Consistent = false;
+    }
+    else if (Pair[SI].Classification == Subscript::ZIV)
+      Separable.set(SI);
+    else {
+      // SIV, RDIV, or MIV, so check for coupled group
+      bool Done = true;
+      for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
+        SmallBitVector Intersection = Pair[SI].GroupLoops;
+        Intersection &= Pair[SJ].GroupLoops;
+        if (Intersection.any()) {
+          // accumulate set of all the loops in group
+          Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
+          // accumulate set of all subscripts in group
+          Pair[SJ].Group |= Pair[SI].Group;
+          Done = false;
+        }
+      }
+      if (Done) {
+        if (Pair[SI].Group.count() == 1)
+          Separable.set(SI);
+        else
+          Coupled.set(SI);
+      }
+    }
+  }
+
+  Constraint NewConstraint;
+  NewConstraint.setAny(SE);
+
+  // test separable subscripts
+  for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+    switch (Pair[SI].Classification) {
+    case Subscript::SIV: {
+      unsigned Level;
+      const SCEV *SplitIter = NULL;
+      (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
+                     Result, NewConstraint, SplitIter);
+      if (Level == SplitLevel) {
+        assert(SplitIter != NULL);
+        return SplitIter;
+      }
+      break;
+    }
+    case Subscript::ZIV:
+    case Subscript::RDIV:
+    case Subscript::MIV:
+      break;
+    default:
+      llvm_unreachable("subscript has unexpected classification");
+    }
+  }
+
+  if (Coupled.count()) {
+    // test coupled subscript groups
+    SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
+    for (unsigned II = 0; II <= MaxLevels; ++II)
+      Constraints[II].setAny(SE);
+    for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+      SmallBitVector Group(Pair[SI].Group);
+      SmallBitVector Sivs(Pairs);
+      SmallBitVector Mivs(Pairs);
+      SmallBitVector ConstrainedLevels(MaxLevels + 1);
+      for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+        if (Pair[SJ].Classification == Subscript::SIV)
+          Sivs.set(SJ);
+        else
+          Mivs.set(SJ);
+      }
+      while (Sivs.any()) {
+        bool Changed = false;
+        for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+          // SJ is an SIV subscript that's part of the current coupled group
+          unsigned Level;
+          const SCEV *SplitIter = NULL;
+          (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
+                         Result, NewConstraint, SplitIter);
+          if (Level == SplitLevel && SplitIter)
+            return SplitIter;
+          ConstrainedLevels.set(Level);
+          if (intersectConstraints(&Constraints[Level], &NewConstraint))
+            Changed = true;
+          Sivs.reset(SJ);
+        }
+        if (Changed) {
+          // propagate, possibly creating new SIVs and ZIVs
+          for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+            // SJ is an MIV subscript that's part of the current coupled group
+            if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
+                          Pair[SJ].Loops, Constraints, Result.Consistent)) {
+              Pair[SJ].Classification =
+                classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
+                             Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
+                             Pair[SJ].Loops);
+              switch (Pair[SJ].Classification) {
+              case Subscript::ZIV:
+                Mivs.reset(SJ);
+                break;
+              case Subscript::SIV:
+                Sivs.set(SJ);
+                Mivs.reset(SJ);
+                break;
+              case Subscript::RDIV:
+              case Subscript::MIV:
+                break;
+              default:
+                llvm_unreachable("bad subscript classification");
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  llvm_unreachable("somehow reached end of routine");
+  return NULL;
+}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 5f51f775f14..95e58022ca1 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -243,7 +243,8 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
   if (!TD)
     return false;
 
-  unsigned IntPtrWidth = TD->getPointerSizeInBits();
+  unsigned AS = GEP.getPointerAddressSpace();
+  unsigned IntPtrWidth = TD->getPointerSizeInBits(AS);
   assert(IntPtrWidth == Offset.getBitWidth());
 
   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
@@ -391,7 +392,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
   // Track base/offset pairs when converted to a plain integer provided the
   // integer is large enough to represent the pointer.
   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
-  if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
+  unsigned AS = I.getPointerAddressSpace();
+  if (TD && IntegerSize >= TD->getPointerSizeInBits(AS)) {
     std::pair<Value *, APInt> BaseAndOffset
       = ConstantOffsetPtrs.lookup(I.getOperand(0));
     if (BaseAndOffset.first)
@@ -425,7 +427,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
   // modifications provided the integer is not too large.
   Value *Op = I.getOperand(0);
   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
-  if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
+  unsigned AS = I.getAddressSpace();
+  if (TD && IntegerSize <= TD->getPointerSizeInBits(AS)) {
     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
     if (BaseAndOffset.first)
       ConstantOffsetPtrs[&I] = BaseAndOffset;
@@ -760,7 +763,8 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
   if (!TD || !V->getType()->isPointerTy())
     return 0;
 
-  unsigned IntPtrWidth = TD->getPointerSizeInBits();
+  unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();;
+  unsigned IntPtrWidth = TD->getPointerSizeInBits(AS);
   APInt Offset = APInt::getNullValue(IntPtrWidth);
 
   // Even though we don't look through PHI nodes, we could be called on an
@@ -824,7 +828,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
         // size of the byval type by the target's pointer size.
         PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
         unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
-        unsigned PointerSize = TD->getPointerSizeInBits();
+        unsigned AS = PTy->getAddressSpace();
+        unsigned PointerSize = TD->getPointerSizeInBits(AS);
         // Ceiling division.
         unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
 
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b3d62487fc1..8e326122fa5 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -666,7 +666,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 /// 'Offset' APInt must be the bitwidth of the target's pointer size.
 static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP,
                                 APInt &Offset) {
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  unsigned AS = GEP->getPointerAddressSpace();
+  unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
   assert(IntPtrWidth == Offset.getBitWidth());
 
   gep_type_iterator GTI = gep_type_begin(GEP);
@@ -696,12 +697,14 @@ static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP,
 /// accumulates the total constant offset applied in the returned constant. It
 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
 /// no constant offsets applied.
+/// FIXME: This function also exists in InlineCost.cpp.
 static Constant *stripAndComputeConstantOffsets(const DataLayout &TD,
                                                 Value *&V) {
   if (!V->getType()->isPointerTy())
     return 0;
 
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();;
+  unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
   APInt Offset = APInt::getNullValue(IntPtrWidth);
 
   // Even though we don't look through PHI nodes, we could be called on an
@@ -1877,7 +1880,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
     // if the integer type is the same size as the pointer type.
     if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) &&
-        Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+        Q.TD->getPointerSizeInBits(
+          cast<PtrToIntInst>(LI)->getPointerAddressSpace()) ==
+        DstTy->getPrimitiveSizeInBits()) {
       if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
         // Transfer the cast to the constant.
         if (Value *V = SimplifyICmpInst(Pred, SrcOp,
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 5e05f4c8ca1..5c2a49e767f 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -19,8 +19,8 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/TargetTransformInfo.h"
 
 using namespace llvm;
 
@@ -1599,15 +1599,15 @@ static bool width_descending(Value *lhs, Value *rhs) {
 /// This does not depend on any SCEVExpander state but should be used in
 /// the same context that SCEVExpander is used.
 unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
-                                           SmallVectorImpl<WeakVH> &DeadInsts,
-                                           const TargetLowering *TLI) {
+                                          SmallVectorImpl<WeakVH> &DeadInsts,
+                                          const ScalarTargetTransformInfo *STTI) {
   // Find integer phis in order of increasing width.
   SmallVector<PHINode*, 8> Phis;
   for (BasicBlock::iterator I = L->getHeader()->begin();
        PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
     Phis.push_back(Phi);
   }
-  if (TLI)
+  if (STTI)
     std::sort(Phis.begin(), Phis.end(), width_descending);
 
   unsigned NumElim = 0;
@@ -1624,8 +1624,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
     PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
     if (!OrigPhiRef) {
       OrigPhiRef = Phi;
-      if (Phi->getType()->isIntegerTy() && TLI
-          && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+      if (Phi->getType()->isIntegerTy() && STTI &&
+          STTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
         // This phi can be freely truncated to the narrowest phi type. Map the
         // truncated expression to it so it will be reused for narrow types.
         const SCEV *TruncExpr =
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 951b442b874..1d7f0692cbe 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -40,7 +40,8 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
   if (unsigned BitWidth = Ty->getScalarSizeInBits())
     return BitWidth;
   assert(isa<PointerType>(Ty) && "Expected a pointer type!");
-  return TD ? TD->getPointerSizeInBits() : 0;
+  return TD ?
+    TD->getPointerSizeInBits(cast<PointerType>(Ty)->getAddressSpace()) : 0;
 }
 
 static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
@@ -1621,7 +1622,8 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
   
   // Re-sign extend from the pointer size if needed to get overflow edge cases
   // right.
-  unsigned PtrSize = TD.getPointerSizeInBits();
+  unsigned AS = GEP->getPointerAddressSpace();
+  unsigned PtrSize = TD.getPointerSizeInBits(AS);
   if (PtrSize < 64)
     Offset = SignExtend64(Offset, PtrSize);
   
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 86bc7aced1e..60e1741d694 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -916,7 +916,7 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
 /// ParseOptionalAttrs - Parse a potentially empty attribute list.  AttrKind
 /// indicates what kind of attribute list this is: 0: function arg, 1: result,
 /// 2: function attr.
-bool LLParser::ParseOptionalAttrs(Attributes::Builder &B, unsigned AttrKind) {
+bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) {
   LocTy AttrLoc = Lex.getLoc();
   bool HaveError = false;
 
@@ -1435,7 +1435,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
     // Parse the argument.
     LocTy ArgLoc;
     Type *ArgTy = 0;
-    Attributes::Builder ArgAttrs;
+    AttrBuilder ArgAttrs;
     Value *V;
     if (ParseType(ArgTy, ArgLoc))
       return true;
@@ -1443,7 +1443,8 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
     // Otherwise, handle normal operands.
     if (ParseOptionalAttrs(ArgAttrs, 0) || ParseValue(ArgTy, V, PFS))
       return true;
-    ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(ArgAttrs)));
+    ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(V->getContext(),
+                                                           ArgAttrs)));
   }
 
   Lex.Lex();  // Lex the ')'.
@@ -1475,7 +1476,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
   } else {
     LocTy TypeLoc = Lex.getLoc();
     Type *ArgTy = 0;
-    Attributes::Builder Attrs;
+    AttrBuilder Attrs;
     std::string Name;
 
     if (ParseType(ArgTy) ||
@@ -1492,7 +1493,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
     if (!FunctionType::isValidArgumentType(ArgTy))
       return Error(TypeLoc, "invalid type for function argument");
 
-    ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attributes::get(Attrs), Name));
+    ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
+                              Attributes::get(ArgTy->getContext(),
+                                              Attrs), Name));
 
     while (EatIfPresent(lltok::comma)) {
       // Handle ... at end of arg list.
@@ -1518,7 +1521,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
       if (!ArgTy->isFirstClassType())
         return Error(TypeLoc, "invalid type for function argument");
 
-      ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attributes::get(Attrs), Name));
+      ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
+                                Attributes::get(ArgTy->getContext(), Attrs),
+                                Name));
     }
   }
 
@@ -1542,7 +1547,7 @@ bool LLParser::ParseFunctionType(Type *&Result) {
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     if (!ArgList[i].Name.empty())
       return Error(ArgList[i].Loc, "argument name invalid in function type");
-    if (ArgList[i].Attrs)
+    if (ArgList[i].Attrs.hasAttributes())
       return Error(ArgList[i].Loc,
                    "argument attributes invalid in function type");
   }
@@ -2672,7 +2677,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   unsigned Linkage;
 
   unsigned Visibility;
-  Attributes::Builder RetAttrs;
+  AttrBuilder RetAttrs;
   CallingConv::ID CC;
   Type *RetType = 0;
   LocTy RetTypeLoc = Lex.getLoc();
@@ -2736,7 +2741,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   SmallVector<ArgInfo, 8> ArgList;
   bool isVarArg;
-  Attributes::Builder FuncAttrs;
+  AttrBuilder FuncAttrs;
   std::string Section;
   unsigned Alignment;
   std::string GC;
@@ -2766,7 +2771,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   SmallVector<AttributeWithIndex, 8> Attrs;
 
   if (RetAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                              Attributes::get(RetType->getContext(),
+                                              RetAttrs)));
 
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     ParamTypeList.push_back(ArgList[i].Ty);
@@ -2775,7 +2783,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   }
 
   if (FuncAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FuncAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                              Attributes::get(RetType->getContext(),
+                                              FuncAttrs)));
 
   AttrListPtr PAL = AttrListPtr::get(Attrs);
 
@@ -2795,6 +2806,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       ForwardRefVals.find(FunctionName);
     if (FRVI != ForwardRefVals.end()) {
       Fn = M->getFunction(FunctionName);
+      if (!Fn)
+        return Error(FRVI->second.second, "invalid forward reference to "
+                     "function as global value!");
       if (Fn->getType() != PFT)
         return Error(FRVI->second.second, "invalid forward reference to "
                      "function '" + FunctionName + "' with wrong type!");
@@ -3248,7 +3262,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
 ///       OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
 bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   LocTy CallLoc = Lex.getLoc();
-  Attributes::Builder RetAttrs, FnAttrs;
+  AttrBuilder RetAttrs, FnAttrs;
   CallingConv::ID CC;
   Type *RetType = 0;
   LocTy RetTypeLoc;
@@ -3294,7 +3308,10 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   // Set up the Attributes for the function.
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                              Attributes::get(Callee->getContext(),
+                                              RetAttrs)));
 
   SmallVector<Value*, 8> Args;
 
@@ -3322,7 +3339,10 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
     return Error(CallLoc, "not enough parameters specified for call");
 
   if (FnAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FnAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                              Attributes::get(Callee->getContext(),
+                                              FnAttrs)));
 
   // Finish off the Attributes and check them
   AttrListPtr PAL = AttrListPtr::get(Attrs);
@@ -3647,7 +3667,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
 ///       ParameterList OptionalAttrs
 bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
                          bool isTail) {
-  Attributes::Builder RetAttrs, FnAttrs;
+  AttrBuilder RetAttrs, FnAttrs;
   CallingConv::ID CC;
   Type *RetType = 0;
   LocTy RetTypeLoc;
@@ -3690,7 +3710,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   // Set up the Attributes for the function.
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                              Attributes::get(Callee->getContext(),
+                                              RetAttrs)));
 
   SmallVector<Value*, 8> Args;
 
@@ -3718,7 +3741,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
     return Error(CallLoc, "not enough parameters specified for call");
 
   if (FnAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FnAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                              Attributes::get(Callee->getContext(),
+                                              FnAttrs)));
 
   // Finish off the Attributes and check them
   AttrListPtr PAL = AttrListPtr::get(Attrs);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 671eaf64291..c6bbdb27aee 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -175,7 +175,7 @@ namespace llvm {
     bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
     bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
     bool ParseOptionalAddrSpace(unsigned &AddrSpace);
-    bool ParseOptionalAttrs(Attributes::Builder &Attrs, unsigned AttrKind);
+    bool ParseOptionalAttrs(AttrBuilder &Attrs, unsigned AttrKind);
     bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
     bool ParseOptionalLinkage(unsigned &Linkage) {
       bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 8860ef065c6..279343c48c6 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -211,7 +211,6 @@ namespace {
     }
 
     /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
-    //static inline bool classof(const ConstantPlaceHolder *) { return true; }
     static bool classof(const Value *V) {
       return isa<ConstantExpr>(V) &&
              cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
@@ -477,14 +476,15 @@ bool BitcodeReader::ParseAttributeBlock() {
 
       for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
         Attributes ReconstitutedAttr =
-          Attributes::decodeLLVMAttributesForBitcode(Record[i+1]);
+          Attributes::decodeLLVMAttributesForBitcode(Context, Record[i+1]);
         Record[i+1] = ReconstitutedAttr.Raw();
       }
 
       for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
-        if (Attributes(Record[i+1]).hasAttributes())
+        AttrBuilder B(Record[i+1]);
+        if (B.hasAttributes())
           Attrs.push_back(AttributeWithIndex::get(Record[i],
-                                                  Attributes(Record[i+1])));
+                                                  Attributes::get(Context, B)));
       }
 
       MAttributes.push_back(AttrListPtr::get(Attrs));
@@ -891,9 +891,9 @@ bool BitcodeReader::ParseMetadata() {
   }
 }
 
-/// DecodeSignRotatedValue - Decode a signed value stored with the sign bit in
+/// decodeSignRotatedValue - Decode a signed value stored with the sign bit in
 /// the LSB for dense VBR encoding.
-static uint64_t DecodeSignRotatedValue(uint64_t V) {
+uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
   if ((V & 1) == 0)
     return V >> 1;
   if (V != 1)
@@ -943,7 +943,7 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() {
 static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
   SmallVector<uint64_t, 8> Words(Vals.size());
   std::transform(Vals.begin(), Vals.end(), Words.begin(),
-                 DecodeSignRotatedValue);
+                 BitcodeReader::decodeSignRotatedValue);
 
   return APInt(TypeBits, Words);
 }
@@ -997,7 +997,7 @@ bool BitcodeReader::ParseConstants() {
     case bitc::CST_CODE_INTEGER:   // INTEGER: [intval]
       if (!CurTy->isIntegerTy() || Record.empty())
         return Error("Invalid CST_INTEGER record");
-      V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0]));
+      V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0]));
       break;
     case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
       if (!CurTy->isIntegerTy() || Record.empty())
@@ -1524,13 +1524,22 @@ bool BitcodeReader::ParseModule(bool Resume) {
     // Read a record.
     switch (Stream.ReadRecord(Code, Record)) {
     default: break;  // Default behavior, ignore unknown content.
-    case bitc::MODULE_CODE_VERSION:  // VERSION: [version#]
+    case bitc::MODULE_CODE_VERSION: {  // VERSION: [version#]
       if (Record.size() < 1)
         return Error("Malformed MODULE_CODE_VERSION");
-      // Only version #0 is supported so far.
-      if (Record[0] != 0)
-        return Error("Unknown bitstream version!");
+      // Only version #0 and #1 are supported so far.
+      unsigned module_version = Record[0];
+      switch (module_version) {
+        default: return Error("Unknown bitstream version!");
+        case 0:
+          UseRelativeIDs = false;
+          break;
+        case 1:
+          UseRelativeIDs = true;
+          break;
+      }
       break;
+    }
     case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
       std::string S;
       if (ConvertToString(Record, 0, S))
@@ -1797,13 +1806,6 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
     // Read a record.
     switch (Stream.ReadRecord(Code, Record)) {
     default: break;  // Default behavior, ignore unknown content.
-    case bitc::MODULE_CODE_VERSION:  // VERSION: [version#]
-      if (Record.size() < 1)
-        return Error("Malformed MODULE_CODE_VERSION");
-      // Only version #0 is supported so far.
-      if (Record[0] != 0)
-        return Error("Unknown bitstream version!");
-      break;
     case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
       std::string S;
       if (ConvertToString(Record, 0, S))
@@ -2016,7 +2018,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *LHS, *RHS;
       if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
-          getValue(Record, OpNum, LHS->getType(), RHS) ||
+          popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
           OpNum+1 > Record.size())
         return Error("Invalid BINOP record");
 
@@ -2131,8 +2133,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *TrueVal, *FalseVal, *Cond;
       if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
-          getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
-          getValue(Record, OpNum, Type::getInt1Ty(Context), Cond))
+          popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
+          popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond))
         return Error("Invalid SELECT record");
 
       I = SelectInst::Create(Cond, TrueVal, FalseVal);
@@ -2146,7 +2148,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *TrueVal, *FalseVal, *Cond;
       if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
-          getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
+          popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
           getValueTypePair(Record, OpNum, NextValueNo, Cond))
         return Error("Invalid SELECT record");
 
@@ -2171,7 +2173,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Vec, *Idx;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
-          getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
+          popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
         return Error("Invalid EXTRACTELT record");
       I = ExtractElementInst::Create(Vec, Idx);
       InstructionList.push_back(I);
@@ -2182,9 +2184,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Vec, *Elt, *Idx;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
-          getValue(Record, OpNum,
+          popValue(Record, OpNum, NextValueNo,
                    cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
-          getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
+          popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
         return Error("Invalid INSERTELT record");
       I = InsertElementInst::Create(Vec, Elt, Idx);
       InstructionList.push_back(I);
@@ -2195,7 +2197,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Vec1, *Vec2, *Mask;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
-          getValue(Record, OpNum, Vec1->getType(), Vec2))
+          popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2))
         return Error("Invalid SHUFFLEVEC record");
 
       if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
@@ -2215,7 +2217,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *LHS, *RHS;
       if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
-          getValue(Record, OpNum, LHS->getType(), RHS) ||
+          popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
           OpNum+1 != Record.size())
         return Error("Invalid CMP record");
 
@@ -2260,7 +2262,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       }
       else {
         BasicBlock *FalseDest = getBasicBlock(Record[1]);
-        Value *Cond = getFnValueByID(Record[2], Type::getInt1Ty(Context));
+        Value *Cond = getValue(Record, 2, NextValueNo,
+                               Type::getInt1Ty(Context));
         if (FalseDest == 0 || Cond == 0)
           return Error("Invalid BR record");
         I = BranchInst::Create(TrueDest, FalseDest, Cond);
@@ -2276,7 +2279,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         Type *OpTy = getTypeByID(Record[1]);
         unsigned ValueBitWidth = cast<IntegerType>(OpTy)->getBitWidth();
 
-        Value *Cond = getFnValueByID(Record[2], OpTy);
+        Value *Cond = getValue(Record, 2, NextValueNo, OpTy);
         BasicBlock *Default = getBasicBlock(Record[3]);
         if (OpTy == 0 || Cond == 0 || Default == 0)
           return Error("Invalid SWITCH record");
@@ -2331,7 +2334,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (Record.size() < 3 || (Record.size() & 1) == 0)
         return Error("Invalid SWITCH record");
       Type *OpTy = getTypeByID(Record[0]);
-      Value *Cond = getFnValueByID(Record[1], OpTy);
+      Value *Cond = getValue(Record, 1, NextValueNo, OpTy);
       BasicBlock *Default = getBasicBlock(Record[2]);
       if (OpTy == 0 || Cond == 0 || Default == 0)
         return Error("Invalid SWITCH record");
@@ -2355,7 +2358,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (Record.size() < 2)
         return Error("Invalid INDIRECTBR record");
       Type *OpTy = getTypeByID(Record[0]);
-      Value *Address = getFnValueByID(Record[1], OpTy);
+      Value *Address = getValue(Record, 1, NextValueNo, OpTy);
       if (OpTy == 0 || Address == 0)
         return Error("Invalid INDIRECTBR record");
       unsigned NumDests = Record.size()-2;
@@ -2397,7 +2400,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
 
       SmallVector<Value*, 16> Ops;
       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
-        Ops.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+        Ops.push_back(getValue(Record, OpNum, NextValueNo,
+                               FTy->getParamType(i)));
         if (Ops.back() == 0) return Error("Invalid INVOKE record");
       }
 
@@ -2444,7 +2448,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       InstructionList.push_back(PN);
 
       for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
-        Value *V = getFnValueByID(Record[1+i], Ty);
+        Value *V;
+        // With the new function encoding, it is possible that operands have
+        // negative IDs (for forward references).  Use a signed VBR
+        // representation to keep the encoding small.
+        if (UseRelativeIDs)
+          V = getValueSigned(Record, 1+i, NextValueNo, Ty);
+        else
+          V = getValue(Record, 1+i, NextValueNo, Ty);
         BasicBlock *BB = getBasicBlock(Record[2+i]);
         if (!V || !BB) return Error("Invalid PHI record");
         PN->addIncoming(V, BB);
@@ -2542,7 +2553,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Val, *Ptr;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
-          getValue(Record, OpNum,
+          popValue(Record, OpNum, NextValueNo,
                     cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
           OpNum+2 != Record.size())
         return Error("Invalid STORE record");
@@ -2556,7 +2567,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Val, *Ptr;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
-          getValue(Record, OpNum,
+          popValue(Record, OpNum, NextValueNo,
                     cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
           OpNum+4 != Record.size())
         return Error("Invalid STOREATOMIC record");
@@ -2579,9 +2590,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Ptr, *Cmp, *New;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
-          getValue(Record, OpNum,
+          popValue(Record, OpNum, NextValueNo,
                     cast<PointerType>(Ptr->getType())->getElementType(), Cmp) ||
-          getValue(Record, OpNum,
+          popValue(Record, OpNum, NextValueNo,
                     cast<PointerType>(Ptr->getType())->getElementType(), New) ||
           OpNum+3 != Record.size())
         return Error("Invalid CMPXCHG record");
@@ -2599,7 +2610,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Ptr, *Val;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
-          getValue(Record, OpNum,
+          popValue(Record, OpNum, NextValueNo,
                     cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
           OpNum+4 != Record.size())
         return Error("Invalid ATOMICRMW record");
@@ -2653,7 +2664,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         if (FTy->getParamType(i)->isLabelTy())
           Args.push_back(getBasicBlock(Record[OpNum]));
         else
-          Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+          Args.push_back(getValue(Record, OpNum, NextValueNo,
+                                  FTy->getParamType(i)));
         if (Args.back() == 0) return Error("Invalid CALL record");
       }
 
@@ -2682,7 +2694,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (Record.size() < 3)
         return Error("Invalid VAARG record");
       Type *OpTy = getTypeByID(Record[0]);
-      Value *Op = getFnValueByID(Record[1], OpTy);
+      Value *Op = getValue(Record, 1, NextValueNo, OpTy);
       Type *ResTy = getTypeByID(Record[2]);
       if (!OpTy || !Op || !ResTy)
         return Error("Invalid VAARG record");
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index e7c4e94f785..3d5c0eb4def 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -179,18 +179,27 @@ class BitcodeReader : public GVMaterializer {
   typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
   DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
 
+  /// UseRelativeIDs - Indicates that we are using a new encoding for
+  /// instruction operands where most operands in the current
+  /// FUNCTION_BLOCK are encoded relative to the instruction number,
+  /// for a more compact encoding.  Some instruction operands are not
+  /// relative to the instruction ID: basic block numbers, and types.
+  /// Once the old style function blocks have been phased out, we would
+  /// not need this flag.
+  bool UseRelativeIDs;
+
 public:
   explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
     : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
       LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false),
       ErrorString(0), ValueList(C), MDValueList(C),
-      SeenFirstFunctionBody(false) {
+      SeenFirstFunctionBody(false), UseRelativeIDs(false) {
   }
   explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
     : Context(C), TheModule(0), Buffer(0), BufferOwned(false),
       LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
       ErrorString(0), ValueList(C), MDValueList(C),
-      SeenFirstFunctionBody(false) {
+      SeenFirstFunctionBody(false), UseRelativeIDs(false) {
   }
   ~BitcodeReader() {
     FreeState();
@@ -223,6 +232,9 @@ public:
   /// @brief Cheap mechanism to just extract module triple
   /// @returns true if an error occurred.
   bool ParseTriple(std::string &Triple);
+
+  static uint64_t decodeSignRotatedValue(uint64_t V);
+
 private:
   Type *getTypeByID(unsigned ID);
   Value *getFnValueByID(unsigned ID, Type *Ty) {
@@ -247,6 +259,9 @@ private:
                         unsigned InstNum, Value *&ResVal) {
     if (Slot == Record.size()) return true;
     unsigned ValNo = (unsigned)Record[Slot++];
+    // Adjust the ValNo, if it was encoded relative to the InstNum.
+    if (UseRelativeIDs)
+      ValNo = InstNum - ValNo;
     if (ValNo < InstNum) {
       // If this is not a forward reference, just return the value we already
       // have.
@@ -255,20 +270,54 @@ private:
     } else if (Slot == Record.size()) {
       return true;
     }
-    
+
     unsigned TypeNo = (unsigned)Record[Slot++];
     ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
     return ResVal == 0;
   }
-  bool getValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
-                Type *Ty, Value *&ResVal) {
-    if (Slot == Record.size()) return true;
-    unsigned ValNo = (unsigned)Record[Slot++];
-    ResVal = getFnValueByID(ValNo, Ty);
+
+  /// popValue - Read a value out of the specified record from slot 'Slot'.
+  /// Increment Slot past the number of slots used by the value in the record.
+  /// Return true if there is an error.
+  bool popValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+                unsigned InstNum, Type *Ty, Value *&ResVal) {
+    if (getValue(Record, Slot, InstNum, Ty, ResVal))
+      return true;
+    // All values currently take a single record slot.
+    ++Slot;
+    return false;
+  }
+
+  /// getValue -- Like popValue, but does not increment the Slot number.
+  bool getValue(SmallVector<uint64_t, 64> &Record, unsigned Slot,
+                unsigned InstNum, Type *Ty, Value *&ResVal) {
+    ResVal = getValue(Record, Slot, InstNum, Ty);
     return ResVal == 0;
   }
 
-  
+  /// getValue -- Version of getValue that returns ResVal directly,
+  /// or 0 if there is an error.
+  Value *getValue(SmallVector<uint64_t, 64> &Record, unsigned Slot,
+                  unsigned InstNum, Type *Ty) {
+    if (Slot == Record.size()) return 0;
+    unsigned ValNo = (unsigned)Record[Slot];
+    // Adjust the ValNo, if it was encoded relative to the InstNum.
+    if (UseRelativeIDs)
+      ValNo = InstNum - ValNo;
+    return getFnValueByID(ValNo, Ty);
+  }
+
+  /// getValueSigned -- Like getValue, but decodes signed VBRs.
+  Value *getValueSigned(SmallVector<uint64_t, 64> &Record, unsigned Slot,
+                        unsigned InstNum, Type *Ty) {
+    if (Slot == Record.size()) return 0;
+    unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
+    // Adjust the ValNo, if it was encoded relative to the InstNum.
+    if (UseRelativeIDs)
+      ValNo = InstNum - ValNo;
+    return getFnValueByID(ValNo, Ty);
+  }
+
   bool ParseModule(bool Resume);
   bool ParseAttributeBlock();
   bool ParseTypeTable();
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index b3f1bb13a9f..60c657ae6dd 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -41,8 +41,6 @@ EnablePreserveUseListOrdering("enable-bc-uselist-preserve",
 /// These are manifest constants used by the bitcode writer. They do not need to
 /// be kept in sync with the reader, but need to be consistent within this file.
 enum {
-  CurVersion = 0,
-
   // VALUE_SYMTAB_BLOCK abbrev id's.
   VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
   VST_ENTRY_7_ABBREV,
@@ -722,16 +720,20 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
   Stream.ExitBlock();
 }
 
+static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) {
+  if ((int64_t)V >= 0)
+    Vals.push_back(V << 1);
+  else
+    Vals.push_back((-V << 1) | 1);
+}
+
 static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals,
                       unsigned &Code, unsigned &AbbrevToUse, const APInt &Val,
                       bool EmitSizeForWideNumbers = false
                       ) {
   if (Val.getBitWidth() <= 64) {
     uint64_t V = Val.getSExtValue();
-    if ((int64_t)V >= 0)
-      Vals.push_back(V << 1);
-    else
-      Vals.push_back((-V << 1) | 1);
+    emitSignedInt64(Vals, V);
     Code = bitc::CST_CODE_INTEGER;
     AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
   } else {
@@ -747,11 +749,7 @@ static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals,
     
     const uint64_t *RawWords = Val.getRawData();
     for (unsigned i = 0; i != NWords; ++i) {
-      int64_t V = RawWords[i];
-      if (V >= 0)
-        Vals.push_back(V << 1);
-      else
-        Vals.push_back((-V << 1) | 1);
+      emitSignedInt64(Vals, RawWords[i]);
     }
     Code = bitc::CST_CODE_WIDE_INTEGER;
   }
@@ -1025,12 +1023,13 @@ static void WriteModuleConstants(const ValueEnumerator &VE,
 ///
 /// This function adds V's value ID to Vals.  If the value ID is higher than the
 /// instruction ID, then it is a forward reference, and it also includes the
-/// type ID.
+/// type ID.  The value ID that is written is encoded relative to the InstID.
 static bool PushValueAndType(const Value *V, unsigned InstID,
                              SmallVector<unsigned, 64> &Vals,
                              ValueEnumerator &VE) {
   unsigned ValID = VE.getValueID(V);
-  Vals.push_back(ValID);
+  // Make encoding relative to the InstID.
+  Vals.push_back(InstID - ValID);
   if (ValID >= InstID) {
     Vals.push_back(VE.getTypeID(V->getType()));
     return true;
@@ -1038,6 +1037,30 @@ static bool PushValueAndType(const Value *V, unsigned InstID,
   return false;
 }
 
+/// pushValue - Like PushValueAndType, but where the type of the value is
+/// omitted (perhaps it was already encoded in an earlier operand).
+static void pushValue(const Value *V, unsigned InstID,
+                      SmallVector<unsigned, 64> &Vals,
+                      ValueEnumerator &VE) {
+  unsigned ValID = VE.getValueID(V);
+  Vals.push_back(InstID - ValID);
+}
+
+static void pushValue64(const Value *V, unsigned InstID,
+                        SmallVector<uint64_t, 128> &Vals,
+                        ValueEnumerator &VE) {
+  uint64_t ValID = VE.getValueID(V);
+  Vals.push_back(InstID - ValID);
+}
+
+static void pushValueSigned(const Value *V, unsigned InstID,
+                            SmallVector<uint64_t, 128> &Vals,
+                            ValueEnumerator &VE) {
+  unsigned ValID = VE.getValueID(V);
+  int64_t diff = ((int32_t)InstID - (int32_t)ValID);
+  emitSignedInt64(Vals, diff);
+}
+
 /// WriteInstruction - Emit an instruction to the specified stream.
 static void WriteInstruction(const Instruction &I, unsigned InstID,
                              ValueEnumerator &VE, BitstreamWriter &Stream,
@@ -1058,7 +1081,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
       Code = bitc::FUNC_CODE_INST_BINOP;
       if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
         AbbrevToUse = FUNCTION_INST_BINOP_ABBREV;
-      Vals.push_back(VE.getValueID(I.getOperand(1)));
+      pushValue(I.getOperand(1), InstID, Vals, VE);
       Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode()));
       uint64_t Flags = GetOptimizationFlags(&I);
       if (Flags != 0) {
@@ -1096,32 +1119,32 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
   case Instruction::Select:
     Code = bitc::FUNC_CODE_INST_VSELECT;
     PushValueAndType(I.getOperand(1), InstID, Vals, VE);
-    Vals.push_back(VE.getValueID(I.getOperand(2)));
+    pushValue(I.getOperand(2), InstID, Vals, VE);
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
     break;
   case Instruction::ExtractElement:
     Code = bitc::FUNC_CODE_INST_EXTRACTELT;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
-    Vals.push_back(VE.getValueID(I.getOperand(1)));
+    pushValue(I.getOperand(1), InstID, Vals, VE);
     break;
   case Instruction::InsertElement:
     Code = bitc::FUNC_CODE_INST_INSERTELT;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
-    Vals.push_back(VE.getValueID(I.getOperand(1)));
-    Vals.push_back(VE.getValueID(I.getOperand(2)));
+    pushValue(I.getOperand(1), InstID, Vals, VE);
+    pushValue(I.getOperand(2), InstID, Vals, VE);
     break;
   case Instruction::ShuffleVector:
     Code = bitc::FUNC_CODE_INST_SHUFFLEVEC;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
-    Vals.push_back(VE.getValueID(I.getOperand(1)));
-    Vals.push_back(VE.getValueID(I.getOperand(2)));
+    pushValue(I.getOperand(1), InstID, Vals, VE);
+    pushValue(I.getOperand(2), InstID, Vals, VE);
     break;
   case Instruction::ICmp:
   case Instruction::FCmp:
     // compare returning Int1Ty or vector of Int1Ty
     Code = bitc::FUNC_CODE_INST_CMP2;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
-    Vals.push_back(VE.getValueID(I.getOperand(1)));
+    pushValue(I.getOperand(1), InstID, Vals, VE);
     Vals.push_back(cast<CmpInst>(I).getPredicate());
     break;
 
@@ -1147,7 +1170,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
       Vals.push_back(VE.getValueID(II.getSuccessor(0)));
       if (II.isConditional()) {
         Vals.push_back(VE.getValueID(II.getSuccessor(1)));
-        Vals.push_back(VE.getValueID(II.getCondition()));
+        pushValue(II.getCondition(), InstID, Vals, VE);
       }
     }
     break;
@@ -1164,7 +1187,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
       Vals64.push_back(SwitchRecordHeader);      
       
       Vals64.push_back(VE.getTypeID(SI.getCondition()->getType()));
-      Vals64.push_back(VE.getValueID(SI.getCondition()));
+      pushValue64(SI.getCondition(), InstID, Vals64, VE);
       Vals64.push_back(VE.getValueID(SI.getDefaultDest()));
       Vals64.push_back(SI.getNumCases());
       for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
@@ -1215,7 +1238,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
   case Instruction::IndirectBr:
     Code = bitc::FUNC_CODE_INST_INDIRECTBR;
     Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
-    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+    // Encode the address operand as relative, but not the basic blocks.
+    pushValue(I.getOperand(0), InstID, Vals, VE);
+    for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i)
       Vals.push_back(VE.getValueID(I.getOperand(i)));
     break;
       
@@ -1234,7 +1259,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
 
     // Emit value #'s for the fixed parameters.
     for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
-      Vals.push_back(VE.getValueID(I.getOperand(i)));  // fixed param.
+      pushValue(I.getOperand(i), InstID, Vals, VE);  // fixed param.
 
     // Emit type/value pairs for varargs params.
     if (FTy->isVarArg()) {
@@ -1256,12 +1281,19 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
   case Instruction::PHI: {
     const PHINode &PN = cast<PHINode>(I);
     Code = bitc::FUNC_CODE_INST_PHI;
-    Vals.push_back(VE.getTypeID(PN.getType()));
+    // With the newer instruction encoding, forward references could give
+    // negative valued IDs.  This is most common for PHIs, so we use
+    // signed VBRs.
+    SmallVector<uint64_t, 128> Vals64;
+    Vals64.push_back(VE.getTypeID(PN.getType()));
     for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
-      Vals.push_back(VE.getValueID(PN.getIncomingValue(i)));
-      Vals.push_back(VE.getValueID(PN.getIncomingBlock(i)));
+      pushValueSigned(PN.getIncomingValue(i), InstID, Vals64, VE);
+      Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i)));
     }
-    break;
+    // Emit a Vals64 vector and exit.
+    Stream.EmitRecord(Code, Vals64, AbbrevToUse);
+    Vals64.clear();
+    return;
   }
 
   case Instruction::LandingPad: {
@@ -1311,7 +1343,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     else
       Code = bitc::FUNC_CODE_INST_STORE;
     PushValueAndType(I.getOperand(1), InstID, Vals, VE);  // ptrty + ptr
-    Vals.push_back(VE.getValueID(I.getOperand(0)));       // val.
+    pushValue(I.getOperand(0), InstID, Vals, VE);         // val.
     Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
     Vals.push_back(cast<StoreInst>(I).isVolatile());
     if (cast<StoreInst>(I).isAtomic()) {
@@ -1322,8 +1354,8 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
   case Instruction::AtomicCmpXchg:
     Code = bitc::FUNC_CODE_INST_CMPXCHG;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);  // ptrty + ptr
-    Vals.push_back(VE.getValueID(I.getOperand(1)));       // cmp.
-    Vals.push_back(VE.getValueID(I.getOperand(2)));       // newval.
+    pushValue(I.getOperand(1), InstID, Vals, VE);         // cmp.
+    pushValue(I.getOperand(2), InstID, Vals, VE);         // newval.
     Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile());
     Vals.push_back(GetEncodedOrdering(
                      cast<AtomicCmpXchgInst>(I).getOrdering()));
@@ -1333,7 +1365,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
   case Instruction::AtomicRMW:
     Code = bitc::FUNC_CODE_INST_ATOMICRMW;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);  // ptrty + ptr
-    Vals.push_back(VE.getValueID(I.getOperand(1)));       // val.
+    pushValue(I.getOperand(1), InstID, Vals, VE);         // val.
     Vals.push_back(GetEncodedRMWOperation(
                      cast<AtomicRMWInst>(I).getOperation()));
     Vals.push_back(cast<AtomicRMWInst>(I).isVolatile());
@@ -1358,8 +1390,13 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     PushValueAndType(CI.getCalledValue(), InstID, Vals, VE);  // Callee
 
     // Emit value #'s for the fixed parameters.
-    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
-      Vals.push_back(VE.getValueID(CI.getArgOperand(i)));  // fixed param.
+    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+      // Check for labels (can happen with asm labels).
+      if (FTy->getParamType(i)->isLabelTy())
+        Vals.push_back(VE.getValueID(CI.getArgOperand(i)));
+      else
+        pushValue(CI.getArgOperand(i), InstID, Vals, VE);  // fixed param.
+    }
 
     // Emit type/value pairs for varargs params.
     if (FTy->isVarArg()) {
@@ -1372,7 +1409,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
   case Instruction::VAArg:
     Code = bitc::FUNC_CODE_INST_VAARG;
     Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));   // valistty
-    Vals.push_back(VE.getValueID(I.getOperand(0))); // valist.
+    pushValue(I.getOperand(0), InstID, Vals, VE); // valist.
     Vals.push_back(VE.getTypeID(I.getType())); // restype.
     break;
   }
@@ -1514,8 +1551,8 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
 // Emit blockinfo, which defines the standard abbreviations etc.
 static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   // We only want to emit block info records for blocks that have multiple
-  // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.  Other
-  // blocks can defined their abbrevs inline.
+  // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.
+  // Other blocks can define their abbrevs inline.
   Stream.EnterBlockInfoBlock(2);
 
   { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings.
@@ -1773,12 +1810,10 @@ static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
 static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
 
-  // Emit the version number if it is non-zero.
-  if (CurVersion) {
-    SmallVector<unsigned, 1> Vals;
-    Vals.push_back(CurVersion);
-    Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
-  }
+  SmallVector<unsigned, 1> Vals;
+  unsigned CurVersion = 1;
+  Vals.push_back(CurVersion);
+  Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
 
   // Analyze the module, enumerating globals, functions, etc.
   ValueEnumerator VE(M);
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 205480a4692..7a1c049d522 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -635,7 +635,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
     --R;
     const unsigned NewSuperReg = Order[R];
     // Don't consider non-allocatable registers
-    if (!RegClassInfo.isAllocatable(NewSuperReg)) continue;
+    if (!MRI.isAllocatable(NewSuperReg)) continue;
     // Don't replace a register with itself.
     if (NewSuperReg == SuperReg) continue;
 
@@ -818,7 +818,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
         DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
         assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
 
-        if (!RegClassInfo.isAllocatable(AntiDepReg)) {
+        if (!MRI.isAllocatable(AntiDepReg)) {
           // Don't break anti-dependencies on non-allocatable registers.
           DEBUG(dbgs() << " (non-allocatable)\n");
           continue;
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 32ad34a76d6..7cde136c5ef 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -29,6 +29,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
   const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
   std::pair<unsigned, unsigned> HintPair =
     VRM.getRegInfo().getRegAllocationHint(VirtReg);
+  const MachineRegisterInfo &MRI = VRM.getRegInfo();
 
   // HintPair.second is a register, phys or virt.
   Hint = HintPair.second;
@@ -52,7 +53,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
     unsigned *P = new unsigned[Order.size()];
     Begin = P;
     for (unsigned i = 0; i != Order.size(); ++i)
-      if (!RCI.isReserved(Order[i]))
+      if (!MRI.isReserved(Order[i]))
         *P++ = Order[i];
     End = P;
 
@@ -69,7 +70,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
 
   // The hint must be a valid physreg for allocation.
   if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
-               !RC->contains(Hint) || RCI.isReserved(Hint)))
+               !RC->contains(Hint) || MRI.isReserved(Hint)))
     Hint = 0;
 }
 
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 09e30eba579..5162ad762e7 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -314,8 +314,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   // the return. Ignore noalias because it doesn't affect the call sequence.
   const Function *F = ExitBB->getParent();
   Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
-  if (Attributes::Builder(CalleeRetAttr ^ CallerRetAttr)
-      .removeAttribute(Attributes::NoAlias).hasAttributes())
+  if (AttrBuilder(CalleeRetAttr).removeAttribute(Attributes::NoAlias) !=
+      AttrBuilder(CallerRetAttr).removeAttribute(Attributes::NoAlias))
     return false;
 
   // It's not safe to eliminate the sign / zero extension of the return value.
@@ -356,7 +356,7 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
   // Conservatively require the attributes of the call to match those of
   // the return. Ignore noalias because it doesn't affect the call sequence.
   Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
-  if (Attributes::Builder(CallerRetAttr)
+  if (AttrBuilder(CallerRetAttr)
       .removeAttribute(Attributes::NoAlias).hasAttributes())
     return false;
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d74a70362a2..4de98da655b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -385,7 +385,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     //   - __tlv_bootstrap - used to make sure support exists
     //   - spare pointer, used when mapped by the runtime
     //   - pointer to mangled symbol above with initializer
-    unsigned PtrSize = TD->getPointerSizeInBits()/8;
+    unsigned AS = GV->getType()->getAddressSpace();
+    unsigned PtrSize = TD->getPointerSizeInBits(AS)/8;
     OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
                           PtrSize, 0);
     OutStreamer.EmitIntValue(0, PtrSize, 0);
@@ -1299,7 +1300,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
 
   // Emit the function pointers in the target-specific order
   const DataLayout *TD = TM.getDataLayout();
-  unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+  unsigned Align = Log2_32(TD->getPointerPrefAlignment(0));
   std::stable_sort(Structors.begin(), Structors.end(), priority_order);
   for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
     const MCSection *OutputSection =
@@ -1480,8 +1481,9 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
     if (Offset == 0)
       return Base;
 
+    unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
     // Truncate/sext the offset to the pointer size.
-    unsigned Width = TD.getPointerSizeInBits();
+    unsigned Width = TD.getPointerSizeInBits(AS);
     if (Width < 64)
       Offset = SignExtend64(Offset, Width);
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index d94e1fe61bf..6c17af2e8c8 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -112,7 +112,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
   
   switch (Encoding & 0x07) {
   default: llvm_unreachable("Invalid encoded value.");
-  case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize();
+  case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(0);
   case dwarf::DW_EH_PE_udata2: return 2;
   case dwarf::DW_EH_PE_udata4: return 4;
   case dwarf::DW_EH_PE_udata8: return 8;
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 4d73b3c2226..73e18cd817b 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -200,7 +200,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
   case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
   case dwarf::DW_FORM_addr:
-    Size = Asm->getDataLayout().getPointerSize(); break;
+    Size = Asm->getDataLayout().getPointerSize(0); break;
   default: llvm_unreachable("DIE Value form not supported yet");
   }
   Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
@@ -222,7 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
   case dwarf::DW_FORM_data8: return sizeof(int64_t);
   case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
-  case dwarf::DW_FORM_addr:  return AP->getDataLayout().getPointerSize();
+  case dwarf::DW_FORM_addr:  return AP->getDataLayout().getPointerSize(0);
   default: llvm_unreachable("DIE Value form not supported yet");
   }
 }
@@ -249,7 +249,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
 unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getDataLayout().getPointerSize();
+  return AP->getDataLayout().getPointerSize(0);
 }
 
 #ifndef NDEBUG
@@ -273,7 +273,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
 unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getDataLayout().getPointerSize();
+  return AP->getDataLayout().getPointerSize(0);
 }
 
 #ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index f93ea1b045b..28a96f3b2b6 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -214,9 +214,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
 
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEValue *) { return true; }
-
 #ifndef NDEBUG
     virtual void print(raw_ostream &O) = 0;
     void dump();
@@ -257,7 +254,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEInteger *) { return true; }
     static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
 
 #ifndef NDEBUG
@@ -286,7 +282,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIELabel *)  { return true; }
     static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
 
 #ifndef NDEBUG
@@ -313,7 +308,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEDelta *)  { return true; }
     static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
 
 #ifndef NDEBUG
@@ -343,7 +337,6 @@ namespace llvm {
     }
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEEntry *)  { return true; }
     static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
 
 #ifndef NDEBUG
@@ -383,7 +376,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEBlock *)  { return true; }
     static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
 
 #ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 6acf19ee8c4..df162e07a88 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -384,7 +384,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
     // DW_AT_ranges appropriately.
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
                    DebugRangeSymbols.size() 
-                   * Asm->getDataLayout().getPointerSize());
+                   * Asm->getDataLayout().getPointerSize(0));
     for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -450,7 +450,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     // DW_AT_ranges appropriately.
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
                    DebugRangeSymbols.size() 
-                   * Asm->getDataLayout().getPointerSize());
+                   * Asm->getDataLayout().getPointerSize(0));
     for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -1765,7 +1765,7 @@ void DwarfDebug::emitDebugInfo() {
     Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
                            DwarfAbbrevSectionSym);
     Asm->OutStreamer.AddComment("Address Size (in bytes)");
-    Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+    Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0));
 
     emitDIE(Die);
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
@@ -1811,14 +1811,14 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(0);
 
   Asm->OutStreamer.AddComment("Op size");
-  Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
+  Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0) + 1);
   Asm->OutStreamer.AddComment("DW_LNE_set_address");
   Asm->EmitInt8(dwarf::DW_LNE_set_address);
 
   Asm->OutStreamer.AddComment("Section end label");
 
   Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
-                                   Asm->getDataLayout().getPointerSize(),
+                                   Asm->getDataLayout().getPointerSize(0),
                                    0/*AddrSpace*/);
 
   // Mark end of matrix.
@@ -2047,7 +2047,7 @@ void DwarfDebug::emitDebugLoc() {
   // Start the dwarf loc section.
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfLocSection());
-  unsigned char Size = Asm->getDataLayout().getPointerSize();
+  unsigned char Size = Asm->getDataLayout().getPointerSize(0);
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
   unsigned index = 1;
   for (SmallVector<DotDebugLocEntry, 4>::iterator
@@ -2144,7 +2144,7 @@ void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfRangesSection());
-  unsigned char Size = Asm->getDataLayout().getPointerSize();
+  unsigned char Size = Asm->getDataLayout().getPointerSize(0);
   for (SmallVector<const MCSymbol *, 8>::iterator
          I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
        I != E; ++I) {
@@ -2202,7 +2202,7 @@ void DwarfDebug::emitDebugInlineInfo() {
   Asm->OutStreamer.AddComment("Dwarf Version");
   Asm->EmitInt16(dwarf::DWARF_VERSION);
   Asm->OutStreamer.AddComment("Address Size (in bytes)");
-  Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+  Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0));
 
   for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
@@ -2233,7 +2233,7 @@ void DwarfDebug::emitDebugInlineInfo() {
 
       if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
       Asm->OutStreamer.EmitSymbolValue(LI->first,
-                                       Asm->getDataLayout().getPointerSize(),0);
+                                       Asm->getDataLayout().getPointerSize(0),0);
     }
   }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 08fb6b3f52c..31d07141a1d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -417,7 +417,7 @@ void DwarfException::EmitExceptionTable() {
     // that we're omitting that bit.
     TTypeEncoding = dwarf::DW_EH_PE_omit;
     // dwarf::DW_EH_PE_absptr
-    TypeFormatSize = Asm->getDataLayout().getPointerSize();
+    TypeFormatSize = Asm->getDataLayout().getPointerSize(0);
   } else {
     // Okay, we have actual filters or typeinfos to emit.  As such, we need to
     // pick a type encoding for them.  We're about to emit a list of pointers to
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index f7c011968c2..d0e27d1d04d 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -91,7 +91,7 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
 /// either condition is detected in a function which uses the GC.
 ///
 void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
-  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(0);
 
   AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
   EmitCamlGlobal(getModule(), AP, "code_end");
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index bc5258ef7d8..dee339a4586 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -164,7 +164,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
       continue;
     float hweight = Hint[hint] += weight;
     if (TargetRegisterInfo::isPhysicalRegister(hint)) {
-      if (hweight > bestPhys && LIS.isAllocatable(hint))
+      if (hweight > bestPhys && mri.isAllocatable(hint))
         bestPhys = hweight, hintPhys = hint;
     } else {
       if (hweight > bestVirt)
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index cdae33c2441..22b91409240 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -50,7 +50,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
   if (MinAlign > (int)Align)
     Align = MinAlign;
   MF.getFrameInfo()->ensureMaxAlignment(Align);
-  TM.getTargetLowering()->HandleByVal(this, Size);
+  TM.getTargetLowering()->HandleByVal(this, Size, Align);
   unsigned Offset = AllocateStack(Size, Align);
   addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
 }
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index a9de1c7490f..377b4712bea 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -527,7 +527,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
         if (Edge->getKind() == SDep::Anti) {
           AntiDepReg = Edge->getReg();
           assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
-          if (!RegClassInfo.isAllocatable(AntiDepReg))
+          if (!MRI.isAllocatable(AntiDepReg))
             // Don't break anti-dependencies on non-allocatable registers.
             AntiDepReg = 0;
           else if (KeepRegs.test(AntiDepReg))
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index b4394e8d56e..8964269dde5 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -33,7 +33,6 @@ namespace {
     const MachineRegisterInfo *MRI;
     const TargetInstrInfo *TII;
     BitVector LivePhysRegs;
-    BitVector ReservedRegs;
 
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -70,7 +69,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
       unsigned Reg = MO.getReg();
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         // Don't delete live physreg defs, or any reserved register defs.
-        if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg))
+        if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
           return false;
       } else {
         if (!MRI->use_nodbg_empty(Reg))
@@ -90,9 +89,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
   TRI = MF.getTarget().getRegisterInfo();
   TII = MF.getTarget().getInstrInfo();
 
-  // Treat reserved registers as always live.
-  ReservedRegs = TRI->getReservedRegs(MF);
-
   // Loop over all instructions in all blocks, from bottom to top, so that it's
   // more likely that chains of dependent but ultimately dead instructions will
   // be cleaned up.
@@ -101,7 +97,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
     MachineBasicBlock *MBB = &*I;
 
     // Start out assuming that reserved registers are live out of this block.
-    LivePhysRegs = ReservedRegs;
+    LivePhysRegs = MRI->getReservedRegs();
 
     // Also add any explicit live-out physregs for this block.
     if (!MBB->empty() && MBB->back().isReturn())
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 141f8edc839..65bc4af99e2 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -110,8 +110,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   DomTree = &getAnalysis<MachineDominatorTree>();
   if (!LRCalc)
     LRCalc = new LiveRangeCalc();
-  AllocatableRegs = TRI->getAllocatableSet(fn);
-  ReservedRegs = TRI->getReservedRegs(fn);
 
   // Allocate space for all virtual registers.
   VirtRegIntervals.resize(MRI->getNumVirtRegs());
@@ -542,11 +540,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
   // Ignore uses of reserved registers. We only track defs of those.
   for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
     unsigned Root = *Roots;
-    if (!isReserved(Root) && !MRI->reg_empty(Root))
+    if (!MRI->isReserved(Root) && !MRI->reg_empty(Root))
       LRCalc->extendToUses(LI, Root);
     for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
       unsigned Reg = *Supers;
-      if (!isReserved(Reg) && !MRI->reg_empty(Reg))
+      if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
         LRCalc->extendToUses(LI, Reg);
     }
   }
@@ -761,38 +759,41 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
   LI->removeRange(Kill, MBBEnd);
   if (EndPoints) EndPoints->push_back(MBBEnd);
 
-  // Find all blocks that are reachable from MBB without leaving VNI's live
-  // range.
-  for (df_iterator<MachineBasicBlock*>
-       I = df_begin(KillMBB), E = df_end(KillMBB); I != E;) {
-    MachineBasicBlock *MBB = *I;
-    // KillMBB itself was already handled.
-    if (MBB == KillMBB) {
-      ++I;
-      continue;
-    }
+  // Find all blocks that are reachable from KillMBB without leaving VNI's live
+  // range. It is possible that KillMBB itself is reachable, so start a DFS
+  // from each successor.
+  typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+  VisitedTy Visited;
+  for (MachineBasicBlock::succ_iterator
+       SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
+       SuccI != SuccE; ++SuccI) {
+    for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
+         I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+         I != E;) {
+      MachineBasicBlock *MBB = *I;
+
+      // Check if VNI is live in to MBB.
+      tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+      LiveRangeQuery LRQ(*LI, MBBStart);
+      if (LRQ.valueIn() != VNI) {
+        // This block isn't part of the VNI live range. Prune the search.
+        I.skipChildren();
+        continue;
+      }
 
-    // Check if VNI is live in to MBB.
-    tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
-    LiveRangeQuery LRQ(*LI, MBBStart);
-    if (LRQ.valueIn() != VNI) {
-      // This block isn't part of the VNI live range. Prune the search.
-      I.skipChildren();
-      continue;
-    }
+      // Prune the search if VNI is killed in MBB.
+      if (LRQ.endPoint() < MBBEnd) {
+        LI->removeRange(MBBStart, LRQ.endPoint());
+        if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+        I.skipChildren();
+        continue;
+      }
 
-    // Prune the search if VNI is killed in MBB.
-    if (LRQ.endPoint() < MBBEnd) {
-      LI->removeRange(MBBStart, LRQ.endPoint());
-      if (EndPoints) EndPoints->push_back(LRQ.endPoint());
-      I.skipChildren();
-      continue;
+      // VNI is live through MBB.
+      LI->removeRange(MBBStart, MBBEnd);
+      if (EndPoints) EndPoints->push_back(MBBEnd);
+      ++I;
     }
-
-    // VNI is live through MBB.
-    LI->removeRange(MBBStart, MBBEnd);
-    if (EndPoints) EndPoints->push_back(MBBEnd);
-    ++I;
   }
 }
 
@@ -1007,246 +1008,252 @@ private:
   LiveIntervals& LIS;
   const MachineRegisterInfo& MRI;
   const TargetRegisterInfo& TRI;
+  SlotIndex OldIdx;
   SlotIndex NewIdx;
-
-  typedef std::pair<LiveInterval*, LiveRange*> IntRangePair;
-  typedef DenseSet<IntRangePair> RangeSet;
-
-  struct RegRanges {
-    LiveRange* Use;
-    LiveRange* EC;
-    LiveRange* Dead;
-    LiveRange* Def;
-    RegRanges() : Use(0), EC(0), Dead(0), Def(0) {}
-  };
-  typedef DenseMap<unsigned, RegRanges> BundleRanges;
+  SmallPtrSet<LiveInterval*, 8> Updated;
+  bool UpdateFlags;
 
 public:
   HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
-           const TargetRegisterInfo& TRI, SlotIndex NewIdx)
-    : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {}
-
-  // Update intervals for all operands of MI from OldIdx to NewIdx.
-  // This assumes that MI used to be at OldIdx, and now resides at
-  // NewIdx.
-  void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) {
-    assert(NewIdx != OldIdx && "No-op move? That's a bit strange.");
-
-    // Collect the operands.
-    RangeSet Entering, Internal, Exiting;
-    bool hasRegMaskOp = false;
-    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
-
-    // To keep the LiveRanges valid within an interval, move the ranges closest
-    // to the destination first. This prevents ranges from overlapping, to that
-    // APIs like removeRange still work.
-    if (NewIdx < OldIdx) {
-      moveAllEnteringFrom(OldIdx, Entering);
-      moveAllInternalFrom(OldIdx, Internal);
-      moveAllExitingFrom(OldIdx, Exiting);
-    }
-    else {
-      moveAllExitingFrom(OldIdx, Exiting);
-      moveAllInternalFrom(OldIdx, Internal);
-      moveAllEnteringFrom(OldIdx, Entering);
-    }
-
-    if (hasRegMaskOp)
-      updateRegMaskSlots(OldIdx);
-
-#ifndef NDEBUG
-    LIValidator validator;
-    validator = std::for_each(Entering.begin(), Entering.end(), validator);
-    validator = std::for_each(Internal.begin(), Internal.end(), validator);
-    validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
-    assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness.");
-#endif
-
+           const TargetRegisterInfo& TRI,
+           SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags)
+    : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx),
+      UpdateFlags(UpdateFlags) {}
+
+  // FIXME: UpdateFlags is a workaround that creates live intervals for all
+  // physregs, even those that aren't needed for regalloc, in order to update
+  // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill
+  // flags, and postRA passes will use a live register utility instead.
+  LiveInterval *getRegUnitLI(unsigned Unit) {
+    if (UpdateFlags)
+      return &LIS.getRegUnit(Unit);
+    return LIS.getCachedRegUnit(Unit);
   }
 
-  // Update intervals for all operands of MI to refer to BundleStart's
-  // SlotIndex.
-  void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) {
-    if (MI == BundleStart)
-      return; // Bundling instr with itself - nothing to do.
-
-    SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI);
-    assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI &&
-           "SlotIndex <-> Instruction mapping broken for MI");
-
-    // Collect all ranges already in the bundle.
-    MachineBasicBlock::instr_iterator BII(BundleStart);
-    RangeSet Entering, Internal, Exiting;
-    bool hasRegMaskOp = false;
-    collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
-    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-    for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) {
-      if (&*BII == MI)
+  /// Update all live ranges touched by MI, assuming a move from OldIdx to
+  /// NewIdx.
+  void updateAllRanges(MachineInstr *MI) {
+    DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI);
+    bool hasRegMask = false;
+    for (MIOperands MO(MI); MO.isValid(); ++MO) {
+      if (MO->isRegMask())
+        hasRegMask = true;
+      if (!MO->isReg())
         continue;
-      collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
-      assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-    }
-
-    BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
-
-    Entering.clear();
-    Internal.clear();
-    Exiting.clear();
-    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
-    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-
-    DEBUG(dbgs() << "Entering: " << Entering.size() << "\n");
-    DEBUG(dbgs() << "Internal: " << Internal.size() << "\n");
-    DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n");
-
-    moveAllEnteringFromInto(OldIdx, Entering, BR);
-    moveAllInternalFromInto(OldIdx, Internal, BR);
-    moveAllExitingFromInto(OldIdx, Exiting, BR);
+      // Aggressively clear all kill flags.
+      // They are reinserted by VirtRegRewriter.
+      if (MO->isUse())
+        MO->setIsKill(false);
 
+      unsigned Reg = MO->getReg();
+      if (!Reg)
+        continue;
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        updateRange(LIS.getInterval(Reg));
+        continue;
+      }
 
-#ifndef NDEBUG
-    LIValidator validator;
-    validator = std::for_each(Entering.begin(), Entering.end(), validator);
-    validator = std::for_each(Internal.begin(), Internal.end(), validator);
-    validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
-    assert(validator.rangesOk() && "moveAllOperandsInto broke liveness.");
-#endif
+      // For physregs, only update the regunits that actually have a
+      // precomputed live range.
+      for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+        if (LiveInterval *LI = getRegUnitLI(*Units))
+          updateRange(*LI);
+    }
+    if (hasRegMask)
+      updateRegMaskSlots();
   }
 
 private:
+  /// Update a single live range, assuming an instruction has been moved from
+  /// OldIdx to NewIdx.
+  void updateRange(LiveInterval &LI) {
+    if (!Updated.insert(&LI))
+      return;
+    DEBUG({
+      dbgs() << "     ";
+      if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+        dbgs() << PrintReg(LI.reg);
+      else
+        dbgs() << PrintRegUnit(LI.reg, &TRI);
+      dbgs() << ":\t" << LI << '\n';
+    });
+    if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
+      handleMoveDown(LI);
+    else
+      handleMoveUp(LI);
+    DEBUG(dbgs() << "        -->\t" << LI << '\n');
+    LI.verify();
+  }
+
+  /// Update LI to reflect an instruction has been moved downwards from OldIdx
+  /// to NewIdx.
+  ///
+  /// 1. Live def at OldIdx:
+  ///    Move def to NewIdx, assert endpoint after NewIdx.
+  ///
+  /// 2. Live def at OldIdx, killed at NewIdx:
+  ///    Change to dead def at NewIdx.
+  ///    (Happens when bundling def+kill together).
+  ///
+  /// 3. Dead def at OldIdx:
+  ///    Move def to NewIdx, possibly across another live value.
+  ///
+  /// 4. Def at OldIdx AND at NewIdx:
+  ///    Remove live range [OldIdx;NewIdx) and value defined at OldIdx.
+  ///    (Happens when bundling multiple defs together).
+  ///
+  /// 5. Value read at OldIdx, killed before NewIdx:
+  ///    Extend kill to NewIdx.
+  ///
+  void handleMoveDown(LiveInterval &LI) {
+    // First look for a kill at OldIdx.
+    LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+    LiveInterval::iterator E = LI.end();
+    // Is LI even live at OldIdx?
+    if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+      return;
 
-#ifndef NDEBUG
-  class LIValidator {
-  private:
-    DenseSet<const LiveInterval*> Checked, Bogus;
-  public:
-    void operator()(const IntRangePair& P) {
-      const LiveInterval* LI = P.first;
-      if (Checked.count(LI))
+    // Handle a live-in value.
+    if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+      bool isKill = SlotIndex::isSameInstr(OldIdx, I->end);
+      // If the live-in value already extends to NewIdx, there is nothing to do.
+      if (!SlotIndex::isEarlierInstr(I->end, NewIdx))
         return;
-      Checked.insert(LI);
-      if (LI->empty())
+      // Aggressively remove all kill flags from the old kill point.
+      // Kill flags shouldn't be used while live intervals exist, they will be
+      // reinserted by VirtRegRewriter.
+      if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end))
+        for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO)
+          if (MO->isReg() && MO->isUse())
+            MO->setIsKill(false);
+      // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by
+      // overlapping ranges. Case 5 above.
+      I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+      // If this was a kill, there may also be a def. Otherwise we're done.
+      if (!isKill)
         return;
-      SlotIndex LastEnd = LI->begin()->start;
-      for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end();
-           LRI != LRE; ++LRI) {
-        const LiveRange& LR = *LRI;
-        if (LastEnd > LR.start || LR.start >= LR.end)
-          Bogus.insert(LI);
-        LastEnd = LR.end;
-      }
-    }
-
-    bool rangesOk() const {
-      return Bogus.empty();
-    }
-  };
-#endif
-
-  // Collect IntRangePairs for all operands of MI that may need fixing.
-  // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes'
-  // maps).
-  void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal,
-                     RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) {
-    hasRegMaskOp = false;
-    for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
-                                    MOE = MI->operands_end();
-         MOI != MOE; ++MOI) {
-      const MachineOperand& MO = *MOI;
-
-      if (MO.isRegMask()) {
-        hasRegMaskOp = true;
-        continue;
-      }
-
-      if (!MO.isReg() || MO.getReg() == 0)
-        continue;
-
-      unsigned Reg = MO.getReg();
-
-      // Don't track uses of reserved registers - they're not accurate.
-      // Reserved register live ranges look like a set of dead defs.
-      bool Resv =
-        TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg);
-
-      // Collect ranges for register units. These live ranges are computed on
-      // demand, so just skip any that haven't been computed yet.
-      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
-          if (LiveInterval *LI = LIS.getCachedRegUnit(*Units))
-            collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx, Resv);
-      } else {
-        // Collect ranges for individual virtual registers.
-        collectRanges(MO, &LIS.getInterval(Reg),
-                      Entering, Internal, Exiting, OldIdx);
-      }
+      ++I;
     }
-  }
 
-  void collectRanges(const MachineOperand &MO, LiveInterval *LI,
-                     RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting,
-                     SlotIndex OldIdx, bool IgnoreReads = false) {
-    if (!IgnoreReads && MO.readsReg()) {
-      LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
-      if (LR != 0)
-        Entering.insert(std::make_pair(LI, LR));
+    // Check for a def at OldIdx.
+    if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start))
+      return;
+    // We have a def at OldIdx.
+    VNInfo *DefVNI = I->valno;
+    assert(DefVNI->def == I->start && "Inconsistent def");
+    DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+    // If the defined value extends beyond NewIdx, just move the def down.
+    // This is case 1 above.
+    if (SlotIndex::isEarlierInstr(NewIdx, I->end)) {
+      I->start = DefVNI->def;
+      return;
     }
-    if (MO.isDef()) {
-      LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
-      assert(LR != 0 && "No live range for def?");
-      if (LR->end > OldIdx.getDeadSlot())
-        Exiting.insert(std::make_pair(LI, LR));
-      else
-        Internal.insert(std::make_pair(LI, LR));
+    // The remaining possibilities are now:
+    // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx).
+    // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot().
+    // In either case, it is possible that there is an existing def at NewIdx.
+    assert((I->end == OldIdx.getDeadSlot() ||
+            SlotIndex::isSameInstr(I->end, NewIdx)) &&
+            "Cannot move def below kill");
+    LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot());
+    if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+      // There is an existing def at NewIdx, case 4 above. The def at OldIdx is
+      // coalesced into that value.
+      assert(NewI->valno != DefVNI && "Multiple defs of value?");
+      LI.removeValNo(DefVNI);
+      return;
     }
+    // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx.
+    // If the def at OldIdx was dead, we allow it to be moved across other LI
+    // values. The new range should be placed immediately before NewI, move any
+    // intermediate ranges up.
+    assert(NewI != I && "Inconsistent iterators");
+    std::copy(llvm::next(I), NewI, I);
+    *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
   }
 
-  BundleRanges createBundleRanges(RangeSet& Entering,
-                                  RangeSet& Internal,
-                                  RangeSet& Exiting) {
-    BundleRanges BR;
+  /// Update LI to reflect an instruction has been moved upwards from OldIdx
+  /// to NewIdx.
+  ///
+  /// 1. Live def at OldIdx:
+  ///    Hoist def to NewIdx.
+  ///
+  /// 2. Dead def at OldIdx:
+  ///    Hoist def+end to NewIdx, possibly move across other values.
+  ///
+  /// 3. Dead def at OldIdx AND existing def at NewIdx:
+  ///    Remove value defined at OldIdx, coalescing it with existing value.
+  ///
+  /// 4. Live def at OldIdx AND existing def at NewIdx:
+  ///    Remove value defined at NewIdx, hoist OldIdx def to NewIdx.
+  ///    (Happens when bundling multiple defs together).
+  ///
+  /// 5. Value killed at OldIdx:
+  ///    Hoist kill to NewIdx, then scan for last kill between NewIdx and
+  ///    OldIdx.
+  ///
+  void handleMoveUp(LiveInterval &LI) {
+    // First look for a kill at OldIdx.
+    LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+    LiveInterval::iterator E = LI.end();
+    // Is LI even live at OldIdx?
+    if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+      return;
 
-    for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-         EI != EE; ++EI) {
-      LiveInterval* LI = EI->first;
-      LiveRange* LR = EI->second;
-      BR[LI->reg].Use = LR;
+    // Handle a live-in value.
+    if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+      // If the live-in value isn't killed here, there is nothing to do.
+      if (!SlotIndex::isSameInstr(OldIdx, I->end))
+        return;
+      // Adjust I->end to end at NewIdx. If we are hoisting a kill above
+      // another use, we need to search for that use. Case 5 above.
+      I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+      ++I;
+      // If OldIdx also defines a value, there couldn't have been another use.
+      if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
+        // No def, search for the new kill.
+        // This can never be an early clobber kill since there is no def.
+        llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot();
+        return;
+      }
     }
 
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II) {
-      LiveInterval* LI = II->first;
-      LiveRange* LR = II->second;
-      if (LR->end.isDead()) {
-        BR[LI->reg].Dead = LR;
-      } else {
-        BR[LI->reg].EC = LR;
+    // Now deal with the def at OldIdx.
+    assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?");
+    VNInfo *DefVNI = I->valno;
+    assert(DefVNI->def == I->start && "Inconsistent def");
+    DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+
+    // Check for an existing def at NewIdx.
+    LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot());
+    if (SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+      assert(NewI->valno != DefVNI && "Same value defined more than once?");
+      // There is an existing def at NewIdx.
+      if (I->end.isDead()) {
+        // Case 3: Remove the dead def at OldIdx.
+        LI.removeValNo(DefVNI);
+        return;
       }
+      // Case 4: Replace def at NewIdx with live def at OldIdx.
+      I->start = DefVNI->def;
+      LI.removeValNo(NewI->valno);
+      return;
     }
 
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI) {
-      LiveInterval* LI = EI->first;
-      LiveRange* LR = EI->second;
-      BR[LI->reg].Def = LR;
+    // There is no existing def at NewIdx. Hoist DefVNI.
+    if (!I->end.isDead()) {
+      // Leave the end point of a live def.
+      I->start = DefVNI->def;
+      return;
     }
 
-    return BR;
-  }
-
-  void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) {
-    MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx);
-    if (!OldKillMI->killsRegister(reg))
-      return; // Bail out if we don't have kill flags on the old register.
-    MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
-    assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
-    assert(!NewKillMI->killsRegister(reg) &&
-           "New kill instr is already a kill.");
-    OldKillMI->clearRegisterKills(reg, &TRI);
-    NewKillMI->addRegisterKilled(reg, &TRI);
+    // DefVNI is a dead def. It may have been moved across other values in LI,
+    // so move I up to NewI. Slide [NewI;I) down one position.
+    std::copy_backward(NewI, I, llvm::next(I));
+    *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
   }
 
-  void updateRegMaskSlots(SlotIndex OldIdx) {
+  void updateRegMaskSlots() {
     SmallVectorImpl<SlotIndex>::iterator RI =
       std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
                        OldIdx);
@@ -1257,7 +1264,7 @@ private:
   }
 
   // Return the last use of reg between NewIdx and OldIdx.
-  SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) {
+  SlotIndex findLastUseBefore(unsigned Reg) {
     SlotIndex LastUse = NewIdx;
 
     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -1291,233 +1298,26 @@ private:
     }
     return LastUse;
   }
-
-  void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    bool LiveThrough = LR->end > OldIdx.getRegSlot();
-    if (LiveThrough)
-      return;
-    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
-    if (LastUse != NewIdx)
-      moveKillFlags(LI->reg, NewIdx, LastUse);
-    LR->end = LastUse.getRegSlot(LR->end.isEarlyClobber());
-  }
-
-  void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    // Extend the LiveRange if NewIdx is past the end.
-    if (NewIdx > LR->end) {
-      // Move kill flags if OldIdx was not originally the end
-      // (otherwise LR->end points to an invalid slot).
-      if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
-        assert(LR->end > OldIdx && "LiveRange does not cover original slot");
-        moveKillFlags(LI->reg, LR->end, NewIdx);
-      }
-      LR->end = NewIdx.getRegSlot(LR->end.isEarlyClobber());
-    }
-  }
-
-  void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) {
-    bool GoingUp = NewIdx < OldIdx;
-
-    if (GoingUp) {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringUpFrom(OldIdx, *EI);
-    } else {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringDownFrom(OldIdx, *EI);
-    }
-  }
-
-  void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
-           LR->end <= OldIdx.getDeadSlot() &&
-           "Range should be internal to OldIdx.");
-    LiveRange Tmp(*LR);
-    Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber());
-    Tmp.valno->def = Tmp.start;
-    Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot();
-    LI->removeRange(*LR);
-    LI->addRange(Tmp);
-  }
-
-  void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) {
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II)
-      moveInternalFrom(OldIdx, *II);
-  }
-
-  void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveRange* LR = P.second;
-    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
-           "Range should start in OldIdx.");
-    assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx.");
-    SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber());
-    LR->start = NewStart;
-    LR->valno->def = NewStart;
-  }
-
-  void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) {
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI)
-      moveExitingFrom(OldIdx, *EI);
-  }
-
-  void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P,
-                              BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    bool LiveThrough = LR->end > OldIdx.getRegSlot();
-    if (LiveThrough) {
-      assert((LR->start < NewIdx || BR[LI->reg].Def == LR) &&
-             "Def in bundle should be def range.");
-      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
-             "If bundle has use for this reg it should be LR.");
-      BR[LI->reg].Use = LR;
-      return;
-    }
-
-    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
-    moveKillFlags(LI->reg, OldIdx, LastUse);
-
-    if (LR->start < NewIdx) {
-      // Becoming a new entering range.
-      assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 &&
-             "Bundle shouldn't be re-defining reg mid-range.");
-      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
-             "Bundle shouldn't have different use range for same reg.");
-      LR->end = LastUse.getRegSlot();
-      BR[LI->reg].Use = LR;
-    } else {
-      // Becoming a new Dead-def.
-      assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) &&
-             "Live range starting at unexpected slot.");
-      assert(BR[LI->reg].Def == LR && "Reg should have def range.");
-      assert(BR[LI->reg].Dead == 0 &&
-               "Can't have def and dead def of same reg in a bundle.");
-      LR->end = LastUse.getDeadSlot();
-      BR[LI->reg].Dead = BR[LI->reg].Def;
-      BR[LI->reg].Def = 0;
-    }
-  }
-
-  void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P,
-                                BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    if (NewIdx > LR->end) {
-      // Range extended to bundle. Add to bundle uses.
-      // Note: Currently adds kill flags to bundle start.
-      assert(BR[LI->reg].Use == 0 &&
-             "Bundle already has use range for reg.");
-      moveKillFlags(LI->reg, LR->end, NewIdx);
-      LR->end = NewIdx.getRegSlot();
-      BR[LI->reg].Use = LR;
-    } else {
-      assert(BR[LI->reg].Use != 0 &&
-             "Bundle should already have a use range for reg.");
-    }
-  }
-
-  void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering,
-                               BundleRanges& BR) {
-    bool GoingUp = NewIdx < OldIdx;
-
-    if (GoingUp) {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringUpFromInto(OldIdx, *EI, BR);
-    } else {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringDownFromInto(OldIdx, *EI, BR);
-    }
-  }
-
-  void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P,
-                            BundleRanges& BR) {
-    // TODO: Sane rules for moving ranges into bundles.
-  }
-
-  void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal,
-                               BundleRanges& BR) {
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II)
-      moveInternalFromInto(OldIdx, *II, BR);
-  }
-
-  void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P,
-                           BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-
-    assert(LR->start.isRegister() &&
-           "Don't know how to merge exiting ECs into bundles yet.");
-
-    if (LR->end > NewIdx.getDeadSlot()) {
-      // This range is becoming an exiting range on the bundle.
-      // If there was an old dead-def of this reg, delete it.
-      if (BR[LI->reg].Dead != 0) {
-        LI->removeRange(*BR[LI->reg].Dead);
-        BR[LI->reg].Dead = 0;
-      }
-      assert(BR[LI->reg].Def == 0 &&
-             "Can't have two defs for the same variable exiting a bundle.");
-      LR->start = NewIdx.getRegSlot();
-      LR->valno->def = LR->start;
-      BR[LI->reg].Def = LR;
-    } else {
-      // This range is becoming internal to the bundle.
-      assert(LR->end == NewIdx.getRegSlot() &&
-             "Can't bundle def whose kill is before the bundle");
-      if (BR[LI->reg].Dead || BR[LI->reg].Def) {
-        // Already have a def for this. Just delete range.
-        LI->removeRange(*LR);
-      } else {
-        // Make range dead, record.
-        LR->end = NewIdx.getDeadSlot();
-        BR[LI->reg].Dead = LR;
-        assert(BR[LI->reg].Use == LR &&
-               "Range becoming dead should currently be use.");
-      }
-      // In both cases the range is no longer a use on the bundle.
-      BR[LI->reg].Use = 0;
-    }
-  }
-
-  void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting,
-                              BundleRanges& BR) {
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI)
-      moveExitingFromInto(OldIdx, *EI, BR);
-  }
-
 };
 
-void LiveIntervals::handleMove(MachineInstr* MI) {
+void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) {
+  assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
   SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
   Indexes->removeMachineInstrFromMaps(MI);
-  SlotIndex NewIndex = MI->isInsideBundle() ?
-                        Indexes->getInstructionIndex(MI) :
-                        Indexes->insertMachineInstrInMaps(MI);
+  SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
   assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
          OldIndex < getMBBEndIdx(MI->getParent()) &&
          "Cannot handle moves across basic block boundaries.");
-  assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
 
-  HMEditor HME(*this, *MRI, *TRI, NewIndex);
-  HME.moveAllRangesFrom(MI, OldIndex);
+  HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+  HME.updateAllRanges(MI);
 }
 
 void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
-                                         MachineInstr* BundleStart) {
+                                         MachineInstr* BundleStart,
+                                         bool UpdateFlags) {
+  SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
   SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
-  HMEditor HME(*this, *MRI, *TRI, NewIndex);
-  HME.moveAllRangesInto(MI, BundleStart);
+  HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+  HME.updateAllRanges(MI);
 }
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 82710414b30..0dfb084f1e1 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -249,7 +249,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
         unsigned Reg = MOI->getReg();
         if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
           // Check if MI reads any unreserved physregs.
-          if (Reg && MOI->readsReg() && !LIS.isReserved(Reg))
+          if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
             ReadsPhysRegs = true;
           continue;
         }
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 7359bb92a15..6ea933d4304 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -503,8 +503,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   MRI = &mf.getRegInfo();
   TRI = MF->getTarget().getRegisterInfo();
 
-  ReservedRegisters = TRI->getReservedRegs(mf);
-
   unsigned NumRegs = TRI->getNumRegs();
   PhysRegDef  = new MachineInstr*[NumRegs];
   PhysRegUse  = new MachineInstr*[NumRegs];
@@ -588,7 +586,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
         unsigned MOReg = UseRegs[i];
         if (TargetRegisterInfo::isVirtualRegister(MOReg))
           HandleVirtRegUse(MOReg, MBB, MI);
-        else if (!ReservedRegisters[MOReg])
+        else if (!MRI->isReserved(MOReg))
           HandlePhysRegUse(MOReg, MI);
       }
 
@@ -601,7 +599,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
         unsigned MOReg = DefRegs[i];
         if (TargetRegisterInfo::isVirtualRegister(MOReg))
           HandleVirtRegDef(MOReg, MI);
-        else if (!ReservedRegisters[MOReg])
+        else if (!MRI->isReserved(MOReg))
           HandlePhysRegDef(MOReg, MI, Defs);
       }
       UpdatePhysRegDefs(MI, Defs);
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 896461fd194..0f260205df2 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -63,8 +63,6 @@ namespace {
     virtual void releaseMemory() {
       ScopeMap.clear();
       Exps.clear();
-      AllocatableRegs.clear();
-      ReservedRegs.clear();
     }
 
   private:
@@ -78,8 +76,6 @@ namespace {
     ScopedHTType VNT;
     SmallVector<MachineInstr*, 64> Exps;
     unsigned CurrVN;
-    BitVector AllocatableRegs;
-    BitVector ReservedRegs;
 
     bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
     bool isPhysDefTriviallyDead(unsigned Reg,
@@ -242,7 +238,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
       return false;
 
     for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
-      if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i]))
+      if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
         // Avoid extending live range of physical registers if they are
         //allocatable or reserved.
         return false;
@@ -635,7 +631,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<MachineDominatorTree>();
-  AllocatableRegs = TRI->getAllocatableSet(MF);
-  ReservedRegs = TRI->getReservedRegs(MF);
   return PerformCSE(DT->getRootNode());
 }
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index bac3aa2c155..4a793281b2c 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -32,7 +33,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted");
 namespace {
   class MachineCopyPropagation : public MachineFunctionPass {
     const TargetRegisterInfo *TRI;
-    BitVector ReservedRegs;
+    MachineRegisterInfo *MRI;
 
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -146,8 +147,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
       DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
       if (CI != AvailCopyMap.end()) {
         MachineInstr *CopyMI = CI->second;
-        if (!ReservedRegs.test(Def) &&
-            (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
+        if (!MRI->isReserved(Def) &&
+            (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
             isNopCopy(CopyMI, Def, Src, TRI)) {
           // The two copies cancel out and the source of the first copy
           // hasn't been overridden, eliminate the second one. e.g.
@@ -259,7 +260,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
            DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
            DI != DE; ++DI) {
         unsigned Reg = (*DI)->getOperand(0).getReg();
-        if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg))
+        if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
           continue;
         (*DI)->eraseFromParent();
         Changed = true;
@@ -296,7 +297,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
     for (SmallSetVector<MachineInstr*, 8>::iterator
            DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
          DI != DE; ++DI) {
-      if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
+      if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
         (*DI)->eraseFromParent();
         Changed = true;
         ++NumDeletes;
@@ -311,7 +312,7 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
   TRI = MF.getTarget().getRegisterInfo();
-  ReservedRegs = TRI->getReservedRegs(MF);
+  MRI = &MF.getRegInfo();
 
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
     Changed |= CopyPropagateBlock(*I);
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 91d52118576..f11785070bb 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -550,7 +550,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
   // address of a block, in which case it is the pointer size.
   switch (getEntryKind()) {
   case MachineJumpTableInfo::EK_BlockAddress:
-    return TD.getPointerSize();
+    return TD.getPointerSize(0);
   case MachineJumpTableInfo::EK_GPRel64BlockAddress:
     return 8;
   case MachineJumpTableInfo::EK_GPRel32BlockAddress:
@@ -570,7 +570,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
   // alignment.
   switch (getEntryKind()) {
   case MachineJumpTableInfo::EK_BlockAddress:
-    return TD.getPointerABIAlignment();
+    return TD.getPointerABIAlignment(0);
   case MachineJumpTableInfo::EK_GPRel64BlockAddress:
     return TD.getABIIntegerTypeAlignment(64);
   case MachineJumpTableInfo::EK_GPRel32BlockAddress:
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 5fb938f3400..ae7c15be158 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -306,22 +306,18 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const {
 
 void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
   ReservedRegs = TRI->getReservedRegs(MF);
+  assert(ReservedRegs.size() == TRI->getNumRegs() &&
+         "Invalid ReservedRegs vector from target");
 }
 
 bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
                                             const MachineFunction &MF) const {
   assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
 
-  // Check if any overlapping register is modified.
+  // Check if any overlapping register is modified, or allocatable so it may be
+  // used later.
   for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
-    if (!def_empty(*AI))
-      return false;
-
-  // Check if any overlapping register is allocatable so it may be used later.
-  if (AllocatableRegs.empty())
-    AllocatableRegs = TRI->getAllocatableSet(MF);
-  for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
-    if (AllocatableRegs.test(*AI))
+    if (!def_empty(*AI) || isAllocatable(*AI))
       return false;
   return true;
 }
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 11a7d4760cb..c55e8b78988 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAGILP.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Support/CommandLine.h"
@@ -359,7 +360,7 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
   BB->splice(InsertPos, BB, MI);
 
   // Update LiveIntervals
-  LIS->handleMove(MI);
+  LIS->handleMove(MI, /*UpdateFlags=*/true);
 
   // Recede RegionBegin if an instruction moves above the first.
   if (RegionBegin == InsertPos)
@@ -451,26 +452,6 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
   }
 }
 
-// Release all DAG roots for scheduling.
-void ScheduleDAGMI::releaseRoots() {
-  SmallVector<SUnit*, 16> BotRoots;
-
-  for (std::vector<SUnit>::iterator
-         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
-    // A SUnit is ready to top schedule if it has no predecessors.
-    if (I->Preds.empty())
-      SchedImpl->releaseTopNode(&(*I));
-    // A SUnit is ready to bottom schedule if it has no successors.
-    if (I->Succs.empty())
-      BotRoots.push_back(&(*I));
-  }
-  // Release bottom roots in reverse order so the higher priority nodes appear
-  // first. This is more natural and slightly more efficient.
-  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
-         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
-    SchedImpl->releaseBottomNode(*I);
-}
-
 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 /// only includes instructions that have DAG nodes, not scheduling boundaries.
@@ -532,8 +513,29 @@ void ScheduleDAGMI::postprocessDAG() {
   }
 }
 
+// Release all DAG roots for scheduling.
+void ScheduleDAGMI::releaseRoots() {
+  SmallVector<SUnit*, 16> BotRoots;
+
+  for (std::vector<SUnit>::iterator
+         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+    // A SUnit is ready to top schedule if it has no predecessors.
+    if (I->Preds.empty())
+      SchedImpl->releaseTopNode(&(*I));
+    // A SUnit is ready to bottom schedule if it has no successors.
+    if (I->Succs.empty())
+      BotRoots.push_back(&(*I));
+  }
+  // Release bottom roots in reverse order so the higher priority nodes appear
+  // first. This is more natural and slightly more efficient.
+  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
+    SchedImpl->releaseBottomNode(*I);
+}
+
 /// Identify DAG roots and setup scheduler queues.
 void ScheduleDAGMI::initQueues() {
+
   // Initialize the strategy before modifying the DAG.
   SchedImpl->initialize(this);
 
@@ -544,6 +546,8 @@ void ScheduleDAGMI::initQueues() {
   // Release all DAG roots for scheduling.
   releaseRoots();
 
+  SchedImpl->registerRoots();
+
   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
   CurrentBottom = RegionEnd;
 }
@@ -1198,6 +1202,86 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.",
                         createConvergingSched);
 
 //===----------------------------------------------------------------------===//
+// ILP Scheduler. Currently for experimental analysis of heuristics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Order nodes by the ILP metric.
+struct ILPOrder {
+  ScheduleDAGILP *ILP;
+  bool MaximizeILP;
+
+  ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {}
+
+  /// \brief Apply a less-than relation on node priority.
+  bool operator()(const SUnit *A, const SUnit *B) const {
+    // Return true if A comes after B in the Q.
+    if (MaximizeILP)
+      return ILP->getILP(A) < ILP->getILP(B);
+    else
+      return ILP->getILP(A) > ILP->getILP(B);
+  }
+};
+
+/// \brief Schedule based on the ILP metric.
+class ILPScheduler : public MachineSchedStrategy {
+  ScheduleDAGILP ILP;
+  ILPOrder Cmp;
+
+  std::vector<SUnit*> ReadyQ;
+public:
+  ILPScheduler(bool MaximizeILP)
+  : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {}
+
+  virtual void initialize(ScheduleDAGMI *DAG) {
+    ReadyQ.clear();
+    ILP.resize(DAG->SUnits.size());
+  }
+
+  virtual void registerRoots() {
+    for (std::vector<SUnit*>::const_iterator
+           I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) {
+      ILP.computeILP(*I);
+    }
+  }
+
+  /// Implement MachineSchedStrategy interface.
+  /// -----------------------------------------
+
+  virtual SUnit *pickNode(bool &IsTopNode) {
+    if (ReadyQ.empty()) return NULL;
+    pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+    SUnit *SU = ReadyQ.back();
+    ReadyQ.pop_back();
+    IsTopNode = false;
+    DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr()
+          << " ILP: " << ILP.getILP(SU) << '\n');
+    return SU;
+  }
+
+  virtual void schedNode(SUnit *, bool) {}
+
+  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
+
+  virtual void releaseBottomNode(SUnit *SU) {
+    ReadyQ.push_back(SU);
+    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+  }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new ILPScheduler(true));
+}
+static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new ILPScheduler(false));
+}
+static MachineSchedRegistry ILPMaxRegistry(
+  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
+static MachineSchedRegistry ILPMinRegistry(
+  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
+
+//===----------------------------------------------------------------------===//
 // Machine Instruction Shuffler for Correctness Testing
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index bc383cba455..b117f8c3a20 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -49,7 +49,6 @@ namespace {
     MachineDominatorTree *DT;   // Machine dominator tree
     MachineLoopInfo *LI;
     AliasAnalysis *AA;
-    BitVector AllocatableSet;   // Which physregs are allocatable?
 
     // Remember which edges have been considered for breaking.
     SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
@@ -229,7 +228,6 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   DT = &getAnalysis<MachineDominatorTree>();
   LI = &getAnalysis<MachineLoopInfo>();
   AA = &getAnalysis<AliasAnalysis>();
-  AllocatableSet = TRI->getAllocatableSet(MF);
 
   bool EverMadeChange = false;
 
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index b3abec76bc9..9686b041329 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -850,14 +850,14 @@ static bool pushDepHeight(const DataDep &Dep,
   return false;
 }
 
-/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists
-/// of all the blocks in Trace. Stop when reaching the block that contains
-/// DefMI.
+/// Assuming that the virtual register defined by DefMI:DefOp was used by
+/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
+/// when reaching the block that contains DefMI.
 void MachineTraceMetrics::Ensemble::
-addLiveIns(const MachineInstr *DefMI,
+addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
            ArrayRef<const MachineBasicBlock*> Trace) {
   assert(!Trace.empty() && "Trace should contain at least one block");
-  unsigned Reg = DefMI->getOperand(0).getReg();
+  unsigned Reg = DefMI->getOperand(DefOp).getReg();
   assert(TargetRegisterInfo::isVirtualRegister(Reg));
   const MachineBasicBlock *DefMBB = DefMI->getParent();
 
@@ -950,7 +950,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
           DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
           if (pushDepHeight(Deps.front(), PHI, Height,
                             Heights, MTM.SchedModel, MTM.TII))
-            addLiveIns(Deps.front().DefMI, Stack);
+            addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
         }
       }
     }
@@ -983,7 +983,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
       // Update the required height of any virtual registers read by MI.
       for (unsigned i = 0, e = Deps.size(); i != e; ++i)
         if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
-          addLiveIns(Deps[i].DefMI, Stack);
+          addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack);
 
       InstrCycles &MICycles = Cycles[MI];
       MICycles.Height = Cycle;
diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h
index 5f3b1d23e41..460730b0405 100644
--- a/lib/CodeGen/MachineTraceMetrics.h
+++ b/lib/CodeGen/MachineTraceMetrics.h
@@ -279,7 +279,7 @@ public:
     unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
     void computeInstrDepths(const MachineBasicBlock*);
     void computeInstrHeights(const MachineBasicBlock*);
-    void addLiveIns(const MachineInstr *DefMI,
+    void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
                     ArrayRef<const MachineBasicBlock*> Trace);
 
   protected:
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 181e09ecc9e..dca68da2f3e 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -80,7 +80,6 @@ namespace {
     BlockSet FunctionBlocks;
 
     BitVector regsReserved;
-    BitVector regsAllocatable;
     RegSet regsLive;
     RegVector regsDefined, regsDead, regsKilled;
     RegMaskVector regMasks;
@@ -186,7 +185,7 @@ namespace {
     }
 
     bool isAllocatable(unsigned Reg) {
-      return Reg < regsAllocatable.size() && regsAllocatable.test(Reg);
+      return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg);
     }
 
     // Analysis information if available
@@ -427,7 +426,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
 
 void MachineVerifier::visitMachineFunctionBefore() {
   lastIndex = SlotIndex();
-  regsReserved = TRI->getReservedRegs(*MF);
+  regsReserved = MRI->getReservedRegs();
 
   // A sub-register of a reserved register is also reserved
   for (int Reg = regsReserved.find_first(); Reg>=0;
@@ -439,8 +438,6 @@ void MachineVerifier::visitMachineFunctionBefore() {
     }
   }
 
-  regsAllocatable = TRI->getAllocatableSet(*MF);
-
   markReachable(&MF->front());
 
   // Build a set of the basic blocks in the function.
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 4ea21d4ff7b..abd62efc026 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -359,7 +359,7 @@ void TargetPassConfig::addIRPasses() {
 
   // Run loop strength reduction before anything else.
   if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
-    addPass(createLoopStrengthReducePass(getTargetLowering()));
+    addPass(createLoopStrengthReducePass());
     if (PrintLSR)
       addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
   }
@@ -389,7 +389,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
     addPass(createDwarfEHPass(TM));
     break;
   case ExceptionHandling::None:
-    addPass(createLowerInvokePass(TM->getTargetLowering()));
+    addPass(createLowerInvokePass());
 
     // The lower invoke pass may create unreachable code. Remove it.
     addPass(createUnreachableBlockEliminationPass());
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 9099862bd31..a795ac8448f 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -527,6 +527,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         SeenMoveImm = true;
       } else {
         Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
+        // optimizeExtInstr might have created new instructions after MI
+        // and before the already incremented MII. Adjust MII so that the
+        // next iteration sees the new instructions.
+        MII = MI;
+        ++MII;
         if (SeenMoveImm)
           Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
       }
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 32c02bf0f03..d57bc7362de 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -490,7 +490,6 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
   DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
 
   BitVector killedRegs(TRI->getNumRegs());
-  BitVector ReservedRegs = TRI->getReservedRegs(MF);
 
   StartBlockForKills(MBB);
 
@@ -531,7 +530,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg() || !MO.isUse()) continue;
       unsigned Reg = MO.getReg();
-      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
 
       bool kill = false;
       if (!killedRegs.test(Reg)) {
@@ -566,7 +565,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
       unsigned Reg = MO.getReg();
-      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
 
       LiveRegs.set(Reg);
 
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index f573d419ea5..e096240e04b 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -509,7 +509,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
 
   // Ignore invalid hints.
   if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
-               !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint)))
+               !RC->contains(Hint) || !MRI->isAllocatable(Hint)))
     Hint = 0;
 
   // Take hint when possible.
@@ -838,7 +838,7 @@ void RAFast::AllocateBasicBlock() {
   // Add live-in registers as live.
   for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I)
-    if (RegClassInfo.isAllocatable(*I))
+    if (MRI->isAllocatable(*I))
       definePhysReg(MII, *I, regReserved);
 
   SmallVector<unsigned, 8> VirtDead;
@@ -970,7 +970,7 @@ void RAFast::AllocateBasicBlock() {
         }
         continue;
       }
-      if (!RegClassInfo.isAllocatable(Reg)) continue;
+      if (!MRI->isAllocatable(Reg)) continue;
       if (MO.isUse()) {
         usePhysReg(MO);
       } else if (MO.isEarlyClobber()) {
@@ -1058,7 +1058,7 @@ void RAFast::AllocateBasicBlock() {
       unsigned Reg = MO.getReg();
 
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        if (!RegClassInfo.isAllocatable(Reg)) continue;
+        if (!MRI->isAllocatable(Reg)) continue;
         definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
                                regFree : regReserved);
         continue;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 984aab2a7a8..9320993d90a 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -208,8 +208,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     mri->setPhysRegUsed(Reg);
   }
 
-  BitVector reservedRegs = tri->getReservedRegs(*mf);
-
   // Iterate over vregs.
   for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
        vregItr != vregEnd; ++vregItr) {
@@ -227,7 +225,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
     for (unsigned i = 0; i != rawOrder.size(); ++i) {
       unsigned preg = rawOrder[i];
-      if (reservedRegs.test(preg))
+      if (mri->isReserved(preg))
         continue;
 
       // vregLI crosses a regmask operand that clobbers preg.
@@ -357,7 +355,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
                                                    loopInfo->getLoopDepth(mbb));
 
       if (cp.isPhys()) {
-        if (!lis->isAllocatable(dst)) {
+        if (!mf->getRegInfo().isAllocatable(dst)) {
           continue;
         }
 
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 652bc3015a3..805d2356730 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -15,8 +15,9 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -57,10 +58,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
   CalleeSaved = CSR;
 
   // Different reserved registers?
-  BitVector RR = TRI->getReservedRegs(*MF);
-  if (RR != Reserved)
+  const BitVector &RR = MF->getRegInfo().getReservedRegs();
+  if (Reserved.size() != RR.size() || RR != Reserved) {
     Update = true;
-  Reserved = RR;
+    Reserved = RR;
+  }
 
   // Invalidate cached information from previous function.
   if (Update)
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 1b46256baf2..ba6b4569a8f 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -895,7 +895,7 @@ bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
   /// Always join simple intervals that are defined by a single copy from a
   /// reserved register. This doesn't increase register pressure, so it is
   /// always beneficial.
-  if (!RegClassInfo.isReserved(CP.getDstReg())) {
+  if (!MRI->isReserved(CP.getDstReg())) {
     DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
     return false;
   }
@@ -1070,7 +1070,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
 /// Attempt joining with a reserved physreg.
 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   assert(CP.isPhys() && "Must be a physreg copy");
-  assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register");
+  assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
   DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
                << '\n');
@@ -1241,6 +1241,9 @@ class JoinVals {
     // Value in the other live range that overlaps this def, if any.
     VNInfo *OtherVNI;
 
+    // Is this value an IMPLICIT_DEF?
+    bool IsImplicitDef;
+
     // True when the live range of this value will be pruned because of an
     // overlapping CR_Replace value in the other live range.
     bool Pruned;
@@ -1249,7 +1252,8 @@ class JoinVals {
     bool PrunedComputed;
 
     Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
-            RedefVNI(0), OtherVNI(0), Pruned(false), PrunedComputed(false) {}
+            RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false),
+            PrunedComputed(false) {}
 
     bool isAnalyzed() const { return WriteLanes != 0; }
   };
@@ -1385,8 +1389,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
     }
 
     // An IMPLICIT_DEF writes undef values.
-    if (DefMI->isImplicitDef())
+    if (DefMI->isImplicitDef()) {
+      V.IsImplicitDef = true;
       V.ValidLanes &= ~V.WriteLanes;
+    }
   }
 
   // Find the value in Other that overlaps VNI->def, if any.
@@ -1724,22 +1730,34 @@ void JoinVals::pruneValues(JoinVals &Other,
     switch (Vals[i].Resolution) {
     case CR_Keep:
       break;
-    case CR_Replace:
+    case CR_Replace: {
       // This value takes precedence over the value in Other.LI.
       LIS->pruneValue(&Other.LI, Def, &EndPoints);
-      // Remove <def,read-undef> flags. This def is now a partial redef.
+      // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
+      // instructions are only inserted to provide a live-out value for PHI
+      // predecessors, so the instruction should simply go away once its value
+      // has been replaced.
+      Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
+      bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep;
       if (!Def.isBlock()) {
+        // Remove <def,read-undef> flags. This def is now a partial redef.
+        // Also remove <def,dead> flags since the joined live range will
+        // continue past this instruction.
         for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
              MO.isValid(); ++MO)
-          if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg)
-            MO->setIsUndef(false);
-	// This value will reach instructions below, but we need to make sure
-	// the live range also reaches the instruction at Def.
-	EndPoints.push_back(Def);
+          if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) {
+            MO->setIsUndef(EraseImpDef);
+            MO->setIsDead(false);
+          }
+        // This value will reach instructions below, but we need to make sure
+        // the live range also reaches the instruction at Def.
+        if (!EraseImpDef)
+          EndPoints.push_back(Def);
       }
       DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
                    << ": " << Other.LI << '\n');
       break;
+    }
     case CR_Erase:
     case CR_Merge:
       if (isPrunedValue(i, Other)) {
@@ -1762,21 +1780,41 @@ void JoinVals::pruneValues(JoinVals &Other,
 void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
                            SmallVectorImpl<unsigned> &ShrinkRegs) {
   for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
-    if (Vals[i].Resolution != CR_Erase)
-      continue;
+    // Get the def location before markUnused() below invalidates it.
     SlotIndex Def = LI.getValNumInfo(i)->def;
-    MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
-    assert(MI && "No instruction to erase");
-    if (MI->isCopy()) {
-      unsigned Reg = MI->getOperand(1).getReg();
-      if (TargetRegisterInfo::isVirtualRegister(Reg) &&
-          Reg != CP.getSrcReg() && Reg != CP.getDstReg())
-        ShrinkRegs.push_back(Reg);
+    switch (Vals[i].Resolution) {
+    case CR_Keep:
+      // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
+      // longer. The IMPLICIT_DEF instructions are only inserted by
+      // PHIElimination to guarantee that all PHI predecessors have a value.
+      if (!Vals[i].IsImplicitDef || !Vals[i].Pruned)
+        break;
+      // Remove value number i from LI. Note that this VNInfo is still present
+      // in NewVNInfo, so it will appear as an unused value number in the final
+      // joined interval.
+      LI.getValNumInfo(i)->markUnused();
+      LI.removeValNo(LI.getValNumInfo(i));
+      DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n');
+      // FALL THROUGH.
+
+    case CR_Erase: {
+      MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+      assert(MI && "No instruction to erase");
+      if (MI->isCopy()) {
+        unsigned Reg = MI->getOperand(1).getReg();
+        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+            Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+          ShrinkRegs.push_back(Reg);
+      }
+      ErasedInstrs.insert(MI);
+      DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
+      LIS->RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+      break;
+    }
+    default:
+      break;
     }
-    ErasedInstrs.insert(MI);
-    DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
-    LIS->RemoveMachineInstrFromMaps(MI);
-    MI->eraseFromParent();
   }
 }
 
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index b267aea816c..94779770e0e 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -337,7 +337,7 @@ static void collectOperands(const MachineInstr *MI,
                             PhysRegOperands &PhysRegOpers,
                             VirtRegOperands &VirtRegOpers,
                             const TargetRegisterInfo *TRI,
-                            const RegisterClassInfo *RCI) {
+                            const MachineRegisterInfo *MRI) {
   for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) {
     const MachineOperand &MO = *OperI;
     if (!MO.isReg() || !MO.getReg())
@@ -345,7 +345,7 @@ static void collectOperands(const MachineInstr *MI,
 
     if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
       VirtRegOpers.collect(MO, TRI);
-    else if (RCI->isAllocatable(MO.getReg()))
+    else if (MRI->isAllocatable(MO.getReg()))
       PhysRegOpers.collect(MO, TRI);
   }
   // Remove redundant physreg dead defs.
@@ -451,7 +451,7 @@ bool RegPressureTracker::recede() {
 
   PhysRegOperands PhysRegOpers;
   VirtRegOperands VirtRegOpers;
-  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
 
   // Boost pressure for all dead defs together.
   increasePhysRegPressure(PhysRegOpers.DeadDefs);
@@ -524,7 +524,7 @@ bool RegPressureTracker::advance() {
 
   PhysRegOperands PhysRegOpers;
   VirtRegOperands VirtRegOpers;
-  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
 
   // Kill liveness at last uses.
   for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
@@ -666,7 +666,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
   // Account for register pressure similar to RegPressureTracker::recede().
   PhysRegOperands PhysRegOpers;
   VirtRegOperands VirtRegOpers;
-  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
 
   // Boost max pressure for all dead defs together.
   // Since CurrSetPressure and MaxSetPressure
@@ -752,7 +752,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
   // Account for register pressure similar to RegPressureTracker::recede().
   PhysRegOperands PhysRegOpers;
   VirtRegOperands VirtRegOpers;
-  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
 
   // Kill liveness at last uses. Assume allocatable physregs are single-use
   // rather than checking LiveIntervals.
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index d673794e1b9..5ec6564ce39 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -92,9 +92,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
     KillRegs.resize(NumPhysRegs);
     DefRegs.resize(NumPhysRegs);
 
-    // Create reserved registers bitvector.
-    ReservedRegs = TRI->getReservedRegs(MF);
-
     // Create callee-saved registers bitvector.
     CalleeSavedRegs.resize(NumPhysRegs);
     const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
@@ -225,9 +222,9 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
   used = RegsAvailable;
   used.flip();
   if (includeReserved)
-    used |= ReservedRegs;
+    used |= MRI->getReservedRegs();
   else
-    used.reset(ReservedRegs);
+    used.reset(MRI->getReservedRegs());
 }
 
 unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index aa45a6861ca..8dcbf83353d 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAGILP.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
@@ -30,6 +31,7 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -933,3 +935,94 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 std::string ScheduleDAGInstrs::getDAGName() const {
   return "dag." + BB->getFullName();
 }
+
+namespace {
+/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+class SchedDAGReverseDFS {
+  std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+public:
+  bool isComplete() const { return DFSStack.empty(); }
+
+  void follow(const SUnit *SU) {
+    DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
+  }
+  void advance() { ++DFSStack.back().second; }
+
+  void backtrack() { DFSStack.pop_back(); }
+
+  const SUnit *getCurr() const { return DFSStack.back().first; }
+
+  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
+
+  SUnit::const_pred_iterator getPredEnd() const {
+    return getCurr()->Preds.end();
+  }
+};
+} // anonymous
+
+void ScheduleDAGILP::resize(unsigned NumSUnits) {
+  ILPValues.resize(NumSUnits);
+}
+
+ILPValue ScheduleDAGILP::getILP(const SUnit *SU) {
+  return ILPValues[SU->NodeNum];
+}
+
+// A leaf node has an ILP of 1/1.
+static ILPValue initILP(const SUnit *SU) {
+  unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1;
+  return ILPValue(Cnt, 1 + SU->getDepth());
+}
+
+/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// search from this root.
+void ScheduleDAGILP::computeILP(const SUnit *Root) {
+  if (!IsBottomUp)
+    llvm_unreachable("Top-down ILP metric is unimplemnted");
+
+  SchedDAGReverseDFS DFS;
+  // Mark a node visited by validating it.
+  ILPValues[Root->NodeNum] = initILP(Root);
+  DFS.follow(Root);
+  for (;;) {
+    // Traverse the leftmost path as far as possible.
+    while (DFS.getPred() != DFS.getPredEnd()) {
+      const SUnit *PredSU = DFS.getPred()->getSUnit();
+      DFS.advance();
+      // If the pred is already valid, skip it.
+      if (ILPValues[PredSU->NodeNum].isValid())
+        continue;
+      ILPValues[PredSU->NodeNum] = initILP(PredSU);
+      DFS.follow(PredSU);
+    }
+    // Visit the top of the stack in postorder and backtrack.
+    unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount;
+    DFS.backtrack();
+    if (DFS.isComplete())
+      break;
+    // Add the recently finished predecessor's bottom-up descendent count.
+    ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount;
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ILPValue::print(raw_ostream &OS) const {
+  if (!isValid())
+    OS << "BADILP";
+  OS << InstrCount << " / " << Cycles << " = "
+     << format("%g", ((double)InstrCount / Cycles));
+}
+
+void ILPValue::dump() const {
+  dbgs() << *this << '\n';
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
+  Val.print(OS);
+  return OS;
+}
+
+} // namespace llvm
+#endif // !NDEBUG || LLVM_ENABLE_DUMP
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 80f495309d1..2ec129f7308 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1587,26 +1587,71 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     break;
   case TargetLowering::Expand: {
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+    ISD::CondCode InvCC = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
     default: llvm_unreachable("Don't know how to expand this condition!");
-    case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    // FIXME: Implement more expansions.
-    }
-
-    SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
-    SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+    case ISD::SETO: 
+        assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT)
+            == TargetLowering::Legal
+            && "If SETO is expanded, SETOEQ must be legal!");
+        CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
+    case ISD::SETUO:  
+        assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT)
+            == TargetLowering::Legal
+            && "If SETUO is expanded, SETUNE must be legal!");
+        CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR;  break;
+    case ISD::SETOEQ:
+    case ISD::SETOGT:
+    case ISD::SETOGE:
+    case ISD::SETOLT:
+    case ISD::SETOLE:
+    case ISD::SETONE: 
+    case ISD::SETUEQ: 
+    case ISD::SETUNE: 
+    case ISD::SETUGT: 
+    case ISD::SETUGE: 
+    case ISD::SETULT: 
+    case ISD::SETULE:
+        // If we are floating point, assign and break, otherwise fall through.
+        if (!OpVT.isInteger()) {
+          // We can use the 4th bit to tell if we are the unordered
+          // or ordered version of the opcode.
+          CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+          Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+          CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+          break;
+        }
+        // Fallthrough if we are unsigned integer.
+    case ISD::SETLE:
+    case ISD::SETGT:
+    case ISD::SETGE:
+    case ISD::SETLT:
+    case ISD::SETNE:
+    case ISD::SETEQ:
+      InvCC = ISD::getSetCCSwappedOperands(CCCode);
+      if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
+        // We only support using the inverted operation and not a
+        // different manner of supporting expanding these cases.
+        llvm_unreachable("Don't know how to expand this condition!");
+      }
+      LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
+      RHS = SDValue();
+      CC = SDValue();
+      return;
+    }
+    
+    SDValue SetCC1, SetCC2;
+    if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+      // If we aren't the ordered or unorder operation,
+      // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+      SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+      SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+    } else {
+      // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+      SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
+      SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+    }
     LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
     RHS = SDValue();
     CC  = SDValue();
@@ -3108,6 +3153,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp3 = Node->getOperand(1);
     if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
         (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         // If div is legal, it's better to do the normal expansion
+         !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
          useDivRem(Node, isSigned, false))) {
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
     } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e3938968b20..92dc5a9831b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1245,32 +1245,30 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
   DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
-  if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
-      == TargetLowering::Custom)
-    Res = TLI.LowerOperation(SDValue(N, 0), DAG);
-
-  if (Res.getNode() == 0) {
-    switch (N->getOpcode()) {
-    default:
-  #ifndef NDEBUG
-      dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
-      N->dump(&DAG); dbgs() << "\n";
-  #endif
-      llvm_unreachable("Do not know how to expand this operator's operand!");
-
-    case ISD::BITCAST:         Res = ExpandOp_BITCAST(N); break;
-    case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
-    case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
-
-    case ISD::BR_CC:      Res = ExpandFloatOp_BR_CC(N); break;
-    case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
-    case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
-    case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
-    case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
-    case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
-    case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
-                                                    OpNo); break;
-    }
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to expand this operator's operand!");
+
+  case ISD::BITCAST:         Res = ExpandOp_BITCAST(N); break;
+  case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
+  case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+  case ISD::BR_CC:      Res = ExpandFloatOp_BR_CC(N); break;
+  case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
+  case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+  case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+  case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
+  case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
+  case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+                                                  OpNo); break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 37f0e60087d..20b7ce6b15b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -634,7 +634,7 @@ private:
   SDValue WidenVecRes_InregOp(SDNode *N);
 
   // Widen Vector Operand.
-  bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+  bool WidenVectorOperand(SDNode *N, unsigned OpNo);
   SDValue WidenVecOp_BITCAST(SDNode *N);
   SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index bb54fd24e21..6bcb3b25e98 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -124,6 +124,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
         // there are only two nodes left, i.e. Lo and Hi.
         SDValue LHS = Vals[Slot];
         SDValue RHS = Vals[Slot + 1];
+
+        if (TLI.isBigEndian())
+          std::swap(LHS, RHS);
+
         Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
                                    EVT::getIntegerVT(
                                      *DAG.getContext(),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index eca4d99098a..d51a6eb192e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2082,16 +2082,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
 //===----------------------------------------------------------------------===//
 // Widen Vector Operand
 //===----------------------------------------------------------------------===//
-bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
-  DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
         N->dump(&DAG);
         dbgs() << "\n");
   SDValue Res = SDValue();
 
+  // See if the target wants to custom widen this node.
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
+    dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
     N->dump(&DAG);
     dbgs() << "\n";
 #endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 79cfcdfe0ea..183416f3fd2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3449,9 +3449,12 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
   EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
                                    IsZeroVal, MemcpyStrSrc,
                                    DAG.getMachineFunction());
+  Type *vtType = VT.isExtended() ? VT.getTypeForEVT(*DAG.getContext()) : NULL;
+  unsigned AS = (vtType && vtType->isPointerTy()) ?
+    cast<PointerType>(vtType)->getAddressSpace() : 0;
 
   if (VT == MVT::Other) {
-    if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() ||
+    if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) ||
         TLI.allowsUnalignedMemoryAccesses(VT)) {
       VT = TLI.getPointerTy();
     } else {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 8fc9c70f995..c314fa5b511 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -474,6 +474,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
     MRI.replaceRegWith(From, To);
   }
 
+  // Freeze the set of reserved registers now that MachineFrameInfo has been
+  // set up. All the information required by getReservedRegs() should be
+  // available now.
+  MRI.freezeReservedRegs(*MF);
+
   // Release function-specific state. SDB and CurDAG are already cleared
   // at this point.
   FuncInfo->clear();
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 8ed66f70443..4439192fe2f 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -563,6 +563,8 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
 /// Return the default expected latency for a def based on it's opcode.
 unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
                                             const MachineInstr *DefMI) const {
+  if (DefMI->isTransient())
+    return 0;
   if (DefMI->mayLoad())
     return SchedModel->LoadLatency;
   if (isHighLatencyDef(DefMI->getOpcode()))
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 8f5d770f665..bf26a6d5920 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -77,9 +77,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                     Flags,
                                                     SectionKind::getDataRel(),
                                                     0, Label->getName());
-  unsigned Size = TM.getDataLayout()->getPointerSize();
+  unsigned Size = TM.getDataLayout()->getPointerSize(0);
   Streamer.SwitchSection(Sec);
-  Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
+  Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment(0));
   Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
   const MCExpr *E = MCConstantExpr::Create(Size, getContext());
   Streamer.EmitELFSize(Label, E);
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 4e753c6ecb4..7a6e2604d77 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -50,10 +50,12 @@ unsigned TargetSchedModel::getNumMicroOps(MachineInstr *MI) const {
     int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
     return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI);
   }
-  if (hasInstrSchedModel())
-    return resolveSchedClass(MI)->NumMicroOps;
-
-  return 1;
+  if (hasInstrSchedModel()) {
+    const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
+    if (SCDesc->isValid())
+      return SCDesc->NumMicroOps;
+  }
+  return MI->isTransient() ? 0 : 1;
 }
 
 /// If we can determine the operand latency from the def only, without machine
@@ -199,7 +201,7 @@ unsigned TargetSchedModel::computeOperandLatency(
     report_fatal_error(ss.str());
   }
 #endif
-  return 1;
+  return DefMI->isTransient() ? 0 : 1;
 }
 
 unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
@@ -209,16 +211,18 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
     return TII->getInstrLatency(&InstrItins, MI);
 
   if (hasInstrSchedModel()) {
-    unsigned Latency = 0;
     const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
-    for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
-         DefIdx != DefEnd; ++DefIdx) {
-      // Lookup the definition's write latency in SubtargetInfo.
-      const MCWriteLatencyEntry *WLEntry =
-        STI->getWriteLatencyEntry(SCDesc, DefIdx);
-      Latency = std::max(Latency, WLEntry->Cycles);
+    if (SCDesc->isValid()) {
+      unsigned Latency = 0;
+      for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+           DefIdx != DefEnd; ++DefIdx) {
+        // Lookup the definition's write latency in SubtargetInfo.
+        const MCWriteLatencyEntry *WLEntry =
+          STI->getWriteLatencyEntry(SCDesc, DefIdx);
+        Latency = std::max(Latency, WLEntry->Cycles);
+      }
+      return Latency;
     }
-    return Latency;
   }
   return TII->defaultDefLatency(&SchedModel, MI);
 }
@@ -251,10 +255,12 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
   // an unbuffered resource. If so, it treated like an in-order cpu.
   if (hasInstrSchedModel()) {
     const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
-    for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
-           *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
-      if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered)
-        return 1;
+    if (SCDesc->isValid()) {
+      for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
+             *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
+        if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered)
+          return 1;
+      }
     }
   }
   return 0;
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index a69a8169d30..bb93bdc0bc2 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -257,9 +257,6 @@ void VirtRegRewriter::rewrite() {
   SmallVector<unsigned, 8> SuperDeads;
   SmallVector<unsigned, 8> SuperDefs;
   SmallVector<unsigned, 8> SuperKills;
-#ifndef NDEBUG
-  BitVector Reserved = TRI->getReservedRegs(*MF);
-#endif
 
   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
        MBBI != MBBE; ++MBBI) {
@@ -283,7 +280,7 @@ void VirtRegRewriter::rewrite() {
         unsigned PhysReg = VRM->getPhys(VirtReg);
         assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
                "Instruction uses unmapped VirtReg");
-        assert(!Reserved.test(PhysReg) && "Reserved register assignment");
+        assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
 
         // Preserve semantics of sub-register operands.
         if (MO.getSubReg()) {
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index c5c46815a28..94a2542e7ad 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -17,6 +17,7 @@
 
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ADT/SmallString.h"
@@ -267,7 +268,7 @@ public:
 void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
                        const std::vector<std::string> &InputArgv) {
   clear();  // Free the old contents.
-  unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+  unsigned PtrSize = EE->getDataLayout()->getPointerSize(0);
   Array = new char[(InputArgv.size()+1)*PtrSize];
 
   DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n");
@@ -342,7 +343,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
 #ifndef NDEBUG
 /// isTargetNullPtr - Return whether the target pointer stored at Loc is null.
 static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
-  unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+  unsigned PtrSize = EE->getDataLayout()->getPointerSize(0);
   for (unsigned i = 0; i < PtrSize; ++i)
     if (*(i + (uint8_t*)Loc))
       return false;
@@ -644,13 +645,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     }
     case Instruction::PtrToInt: {
       GenericValue GV = getConstantValue(Op0);
-      uint32_t PtrWidth = TD->getPointerSizeInBits();
+      unsigned AS = cast<PointerType>(CE->getOperand(1)->getType())
+        ->getAddressSpace();
+      uint32_t PtrWidth = TD->getPointerSizeInBits(AS);
       GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal));
       return GV;
     }
     case Instruction::IntToPtr: {
       GenericValue GV = getConstantValue(Op0);
-      uint32_t PtrWidth = TD->getPointerSizeInBits();
+      unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
+      uint32_t PtrWidth = TD->getPointerSizeInBits(AS);
       if (PtrWidth != GV.IntVal.getBitWidth())
         GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth);
       assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width");
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 5202b091654..326bf79c589 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1054,7 +1054,8 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy,
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
   assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
 
-  uint32_t PtrSize = TD.getPointerSizeInBits();
+  unsigned AS = cast<PointerType>(DstTy)->getAddressSpace();
+  uint32_t PtrSize = TD.getPointerSizeInBits(AS);
   if (PtrSize != Src.IntVal.getBitWidth())
     Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize);
 
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index e16e2d112a9..f58adbe1e1a 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -376,7 +376,7 @@ GenericValue lle_X_sprintf(FunctionType *FT,
       case 'x': case 'X':
         if (HowLong >= 1) {
           if (HowLong == 1 &&
-              TheInterpreter->getDataLayout()->getPointerSizeInBits() == 64 &&
+              TheInterpreter->getDataLayout()->getPointerSizeInBits(0) == 64 &&
               sizeof(long) < sizeof(int64_t)) {
             // Make sure we use %lld with a 64 bit argument because we might be
             // compiling LLI on a 32 bit compiler.
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 19c197903a6..bcd5b263654 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -14,7 +14,9 @@
 
 #include "JIT.h"
 #include "JITDwarfEmitter.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -66,7 +68,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
 void
 JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
                                 const std::vector<MachineMove> &Moves) const {
-  unsigned PointerSize = TD->getPointerSize();
+  unsigned PointerSize = TD->getPointerSize(0);
   int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
           PointerSize : -PointerSize;
   MCSymbol *BaseLabel = 0;
@@ -378,7 +380,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
   for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
     SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
 
-  unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
+  unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(0);
 
   unsigned TypeOffset = sizeof(int8_t) + // Call site format
                         // Call-site table length
@@ -454,12 +456,12 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
     const GlobalVariable *GV = TypeInfos[M - 1];
 
     if (GV) {
-      if (TD->getPointerSize() == sizeof(int32_t))
+      if (TD->getPointerSize(GV->getType()->getAddressSpace()) == sizeof(int32_t))
         JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV));
       else
         JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV));
     } else {
-      if (TD->getPointerSize() == sizeof(int32_t))
+      if (TD->getPointerSize(0) == sizeof(int32_t))
         JCE->emitInt32(0);
       else
         JCE->emitInt64(0);
@@ -481,7 +483,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
 
 unsigned char*
 JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
-  unsigned PointerSize = TD->getPointerSize();
+  unsigned PointerSize = TD->getPointerSize(0);
   int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
           PointerSize : -PointerSize;
 
@@ -541,7 +543,7 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality,
                              unsigned char* StartFunction,
                              unsigned char* EndFunction,
                              unsigned char* ExceptionTable) const {
-  unsigned PointerSize = TD->getPointerSize();
+  unsigned PointerSize = TD->getPointerSize(0);
 
   // EH frame header.
   unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue();
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 1198a7be372..ecafda7286f 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -384,11 +384,6 @@ namespace {
       delete MemMgr;
     }
 
-    /// classof - Methods for support type inquiry through isa, cast, and
-    /// dyn_cast:
-    ///
-    static inline bool classof(const MachineCodeEmitter*) { return true; }
-
     JITResolver &getJITResolver() { return Resolver; }
 
     virtual void startFunction(MachineFunction &F);
@@ -1265,15 +1260,13 @@ void *JIT::getPointerToFunctionOrStub(Function *F) {
     return Addr;
 
   // Get a stub if the target supports it.
-  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
-  JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+  JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter());
   return JE->getJITResolver().getLazyFunctionStub(F);
 }
 
 void JIT::updateFunctionStub(Function *F) {
   // Get the empty stub we generated earlier.
-  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
-  JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+  JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter());
   void *Stub = JE->getJITResolver().getLazyFunctionStub(F);
   void *Addr = getPointerToGlobalIfAvailable(F);
   assert(Addr != Stub && "Function must have non-stub address to be updated.");
@@ -1294,6 +1287,5 @@ void JIT::freeMachineCodeForFunction(Function *F) {
   updateGlobalMapping(F, 0);
 
   // Free the actual memory for the function body and related stuff.
-  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
-  cast<JITEmitter>(JCE)->deallocateMemForFunction(F);
+  static_cast<JITEmitter*>(JCE)->deallocateMemForFunction(F);
 }
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index eb69693359d..c1f8baed1a4 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -378,17 +378,17 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
 
 void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
                                              uint64_t Value) {
-    // Ignore relocations for sections that were not loaded
-    if (Sections[RE.SectionID].Address != 0) {
-      uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
-      DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
-            << " + " << RE.Offset << " (" << format("%p", Target) << ")"
-            << " RelType: " << RE.RelType
-            << " Addend: " << RE.Addend
-            << "\n");
-
-      resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
-                        Value, RE.RelType, RE.Addend);
+  // Ignore relocations for sections that were not loaded
+  if (Sections[RE.SectionID].Address != 0) {
+    uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
+    DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
+          << " + " << RE.Offset << " (" << format("%p", Target) << ")"
+          << " RelType: " << RE.RelType
+          << " Addend: " << RE.Addend
+          << "\n");
+
+    resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
+                      Value, RE.RelType, RE.Addend);
   }
 }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index efefacf632d..08aba64e460 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -59,9 +59,6 @@ public:
       const ELFObjectFile<target_endianness, is64Bits> *v) {
     return v->isDyldType();
   }
-  static inline bool classof(const DyldELFObject *v) {
-    return true;
-  }
 };
 
 template<support::endianness target_endianness, bool is64Bits>
@@ -416,7 +413,13 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
           if (si == Obj.end_sections())
             llvm_unreachable("Symbol section not found, bad object file format!");
           DEBUG(dbgs() << "\t\tThis is section symbol\n");
-          Value.SectionID = findOrEmitSection(Obj, (*si), true, ObjSectionToID);
+          // Default to 'true' in case isText fails (though it never does).
+          bool isCode = true;
+          si->isText(isCode);
+          Value.SectionID = findOrEmitSection(Obj, 
+                                              (*si), 
+                                              isCode, 
+                                              ObjSectionToID);
           Value.Addend = Addend;
           break;
         }
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index b0bc2900ecb..17a6323d0e7 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -251,6 +251,7 @@ public:
   virtual void EmitPad(int64_t Offset);
   virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool);
 
+  virtual void EmitTCEntry(const MCSymbol &S);
 
   virtual void EmitInstruction(const MCInst &Inst);
 
@@ -1299,6 +1300,14 @@ void MCAsmStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) {
+  OS << "\t.tc ";
+  OS << S.getName();
+  OS << "[TC],";
+  OS << S.getName();
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
   assert(getCurrentSection() && "Cannot emit contents before setting section!");
 
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 8107005481d..14fbc1ec839 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -103,6 +103,8 @@ public:
 
   virtual void EmitFileDirective(StringRef Filename);
 
+  virtual void EmitTCEntry(const MCSymbol &S);
+
   virtual void FinishImpl();
 
 private:
@@ -469,6 +471,12 @@ void MCELFStreamer::FinishImpl() {
   this->MCObjectStreamer::FinishImpl();
 }
 
+void MCELFStreamer::EmitTCEntry(const MCSymbol &S)
+{
+  // Creates a R_PPC64_TOC relocation
+  MCObjectStreamer::EmitSymbolValue(&S, 8, 0);
+}
+
 MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
                                     raw_ostream &OS, MCCodeEmitter *CE,
                                     bool RelaxAll, bool NoExecStack) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index c2fff3c5207..0406ff8d446 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -133,6 +133,15 @@ private:
   /// IsDarwin - is Darwin compatibility enabled?
   bool IsDarwin;
 
+  /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
+  bool ParsingInlineAsm;
+
+  /// ParsedOperands - The parsed operands from the last parsed statement.
+  SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
+
+  /// Opcode - The opcode from the last parsed instruction.
+  unsigned Opcode;
+
 public:
   AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
             const MCAsmInfo &MAI);
@@ -171,6 +180,21 @@ public:
 
   virtual const AsmToken &Lex();
 
+  bool ParseStatement();
+  void setParsingInlineAsm(bool V) { ParsingInlineAsm = V; }
+  unsigned getNumParsedOperands() { return ParsedOperands.size(); }
+  MCParsedAsmOperand &getParsedOperand(unsigned OpNum) {
+    assert (ParsedOperands.size() > OpNum);
+    return *ParsedOperands[OpNum];
+  }
+  void freeParsedOperands() {
+    for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+      delete ParsedOperands[i];
+    ParsedOperands.clear();
+  }
+  bool isInstruction() { return Opcode != (unsigned)~0x0; }
+  unsigned getOpcode() { return Opcode; }
+
   bool ParseExpression(const MCExpr *&Res);
   virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
   virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
@@ -181,7 +205,6 @@ public:
 private:
   void CheckForValidSection();
 
-  bool ParseStatement();
   void EatToEndOfLine();
   bool ParseCppHashLineFilenameComment(const SMLoc &L);
 
@@ -412,7 +435,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
   : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
     GenericParser(new GenericAsmParser), PlatformParser(0),
     CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0),
-    AssemblerDialect(~0U), IsDarwin(false) {
+    AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false),
+    Opcode(~0x0) {
   // Save the old handler.
   SavedDiagHandler = SrcMgr.getDiagHandler();
   SavedDiagContext = SrcMgr.getDiagContext();
@@ -604,7 +628,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
 }
 
 void AsmParser::CheckForValidSection() {
-  if (!getStreamer().getCurrentSection()) {
+  if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) {
     TokError("expected section directive before assembly directive");
     Out.SwitchSection(Ctx.getMachOSection(
                         "__TEXT", "__text",
@@ -1310,12 +1334,11 @@ bool AsmParser::ParseStatement() {
   CheckForValidSection();
 
   // Canonicalize the opcode to lower case.
-  SmallString<128> Opcode;
+  SmallString<128> OpcodeStr;
   for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
-    Opcode.push_back(tolower(IDVal[i]));
+    OpcodeStr.push_back(tolower(IDVal[i]));
 
-  SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
-  bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
+  bool HadError = getTargetParser().ParseInstruction(OpcodeStr.str(), IDLoc,
                                                      ParsedOperands);
 
   // Dump the parsed representation, if requested.
@@ -1346,13 +1369,18 @@ bool AsmParser::ParseStatement() {
   }
 
   // If parsing succeeded, match the instruction.
-  if (!HadError)
-    HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, ParsedOperands,
-                                                         Out);
-
-  // Free any parsed operands.
-  for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
-    delete ParsedOperands[i];
+  if (!HadError) {
+    unsigned ErrorInfo;
+    HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, Opcode,
+                                                         ParsedOperands, Out,
+                                                         ErrorInfo,
+                                                         ParsingInlineAsm);
+  }
+
+  // Free any parsed operands.  If parsing ms-style inline assembly it is the
+  // responsibility of the caller (i.e., clang) to free the parsed operands.
+  if (!ParsingInlineAsm)
+    freeParsedOperands();
 
   // Don't skip the rest of the line, the instruction parser is responsible for
   // that.
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 0bac24dc3a7..afece0ba551 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -561,6 +561,10 @@ void MCStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool) {
   abort();
 }
 
+void MCStreamer::EmitTCEntry(const MCSymbol &S) {
+  llvm_unreachable("Unsupported method");
+}
+
 /// EmitRawText - If this file is backed by an assembly streamer, this dumps
 /// the specified string in the output .s file.  This capability is
 /// indicated by the hasRawTextSupport() predicate.
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index abfaecc2790..2cc7a584624 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -98,6 +98,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) {
   case BGP: return "bgp";
   case BGQ: return "bgq";
   case Freescale: return "fsl";
+  case IBM: return "ibm";
   }
 
   llvm_unreachable("Invalid VendorType!");
@@ -128,6 +129,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case NativeClient: return "nacl";
   case CNK: return "cnk";
   case Bitrig: return "bitrig";
+  case AIX: return "aix";
   }
 
   llvm_unreachable("Invalid OSType");
@@ -278,6 +280,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
     .Case("bgp", Triple::BGP)
     .Case("bgq", Triple::BGQ)
     .Case("fsl", Triple::Freescale)
+    .Case("ibm", Triple::IBM)
     .Default(Triple::UnknownVendor);
 }
 
@@ -304,6 +307,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("nacl", Triple::NativeClient)
     .StartsWith("cnk", Triple::CNK)
     .StartsWith("bitrig", Triple::Bitrig)
+    .StartsWith("aix", Triple::AIX)
     .Default(Triple::UnknownOS);
 }
 
diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt
index 28240f4a673..4f64eb4ff24 100644
--- a/lib/TableGen/CMakeLists.txt
+++ b/lib/TableGen/CMakeLists.txt
@@ -1,5 +1,3 @@
-## FIXME: This only requires RTTI because tblgen uses it.  Fix that.
-set(LLVM_REQUIRES_RTTI 1)
 set(LLVM_REQUIRES_EH 1)
 
 add_llvm_library(LLVMTableGen
diff --git a/lib/TableGen/Makefile b/lib/TableGen/Makefile
index 44724389e1d..732d8a197ee 100644
--- a/lib/TableGen/Makefile
+++ b/lib/TableGen/Makefile
@@ -11,8 +11,6 @@ LEVEL = ../..
 LIBRARYNAME = LLVMTableGen
 BUILD_ARCHIVE = 1
 
-## FIXME: This only requires RTTI because tblgen uses it.  Fix that.
-REQUIRES_RTTI = 1
 REQUIRES_EH = 1
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 83f2fff9312..c7b2de2b0fe 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -200,7 +200,7 @@ Init *IntRecTy::convertValue(BitInit *BI) {
 Init *IntRecTy::convertValue(BitsInit *BI) {
   int64_t Result = 0;
   for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i)
-    if (BitInit *Bit = dynamic_cast<BitInit*>(BI->getBit(i))) {
+    if (BitInit *Bit = dyn_cast<BitInit>(BI->getBit(i))) {
       Result |= Bit->getValue() << i;
     } else {
       return 0;
@@ -615,7 +615,7 @@ ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
 
 Record *ListInit::getElementAsRecord(unsigned i) const {
   assert(i < Values.size() && "List element index out of range!");
-  DefInit *DI = dynamic_cast<DefInit*>(Values[i]);
+  DefInit *DI = dyn_cast<DefInit>(Values[i]);
   if (DI == 0) throw "Expected record in list!";
   return DI->getDef();
 }
@@ -650,7 +650,7 @@ Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV,
   // If the element is set to some value, or if we are resolving a reference
   // to a specific variable and that variable is explicitly unset, then
   // replace the VarListElementInit with it.
-  if (IRV || !dynamic_cast<UnsetInit*>(E))
+  if (IRV || !isa<UnsetInit>(E))
     return E;
   return 0;
 }
@@ -667,13 +667,13 @@ std::string ListInit::getAsString() const {
 Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV,
                                           unsigned Elt) const {
   Init *Resolved = resolveReferences(R, IRV);
-  OpInit *OResolved = dynamic_cast<OpInit *>(Resolved);
+  OpInit *OResolved = dyn_cast<OpInit>(Resolved);
   if (OResolved) {
     Resolved = OResolved->Fold(&R, 0);
   }
 
   if (Resolved != this) {
-    TypedInit *Typed = dynamic_cast<TypedInit *>(Resolved); 
+    TypedInit *Typed = dyn_cast<TypedInit>(Resolved);
     assert(Typed && "Expected typed init for list reference");
     if (Typed) {
       Init *New = Typed->resolveListElementReference(R, IRV, Elt);
@@ -709,23 +709,16 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   switch (getOpcode()) {
   case CAST: {
     if (getType()->getAsString() == "string") {
-      StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
-      if (LHSs) {
+      if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
         return LHSs;
-      }
 
-      DefInit *LHSd = dynamic_cast<DefInit*>(LHS);
-      if (LHSd) {
+      if (DefInit *LHSd = dyn_cast<DefInit>(LHS))
         return StringInit::get(LHSd->getDef()->getName());
-      }
 
-      IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
-      if (LHSi) {
+      if (IntInit *LHSi = dyn_cast<IntInit>(LHS))
         return StringInit::get(LHSi->getAsString());
-      }
     } else {
-      StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
-      if (LHSs) {
+      if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) {
         std::string Name = LHSs->getValue();
 
         // From TGParser::ParseIDValue
@@ -773,8 +766,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
     break;
   }
   case HEAD: {
-    ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
-    if (LHSl) {
+    if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
       if (LHSl->getSize() == 0) {
         assert(0 && "Empty list in car");
         return 0;
@@ -784,8 +776,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
     break;
   }
   case TAIL: {
-    ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
-    if (LHSl) {
+    if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
       if (LHSl->getSize() == 0) {
         assert(0 && "Empty list in cdr");
         return 0;
@@ -802,16 +793,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
     break;
   }
   case EMPTY: {
-    ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
-    if (LHSl) {
+    if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
       if (LHSl->getSize() == 0) {
         return IntInit::get(1);
       } else {
         return IntInit::get(0);
       }
     }
-    StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
-    if (LHSs) {
+    if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) {
       if (LHSs->getValue().empty()) {
         return IntInit::get(1);
       } else {
@@ -865,11 +854,11 @@ BinOpInit *BinOpInit::get(BinaryOp opc, Init *lhs,
 Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   switch (getOpcode()) {
   case CONCAT: {
-    DagInit *LHSs = dynamic_cast<DagInit*>(LHS);
-    DagInit *RHSs = dynamic_cast<DagInit*>(RHS);
+    DagInit *LHSs = dyn_cast<DagInit>(LHS);
+    DagInit *RHSs = dyn_cast<DagInit>(RHS);
     if (LHSs && RHSs) {
-      DefInit *LOp = dynamic_cast<DefInit*>(LHSs->getOperator());
-      DefInit *ROp = dynamic_cast<DefInit*>(RHSs->getOperator());
+      DefInit *LOp = dyn_cast<DefInit>(LHSs->getOperator());
+      DefInit *ROp = dyn_cast<DefInit>(RHSs->getOperator());
       if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef())
         throw "Concated Dag operators do not match!";
       std::vector<Init*> Args;
@@ -887,8 +876,8 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
     break;
   }
   case STRCONCAT: {
-    StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
-    StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+    StringInit *LHSs = dyn_cast<StringInit>(LHS);
+    StringInit *RHSs = dyn_cast<StringInit>(RHS);
     if (LHSs && RHSs)
       return StringInit::get(LHSs->getValue() + RHSs->getValue());
     break;
@@ -897,15 +886,15 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
     // try to fold eq comparison for 'bit' and 'int', otherwise fallback
     // to string objects.
     IntInit *L =
-      dynamic_cast<IntInit*>(LHS->convertInitializerTo(IntRecTy::get()));
+      dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get()));
     IntInit *R =
-      dynamic_cast<IntInit*>(RHS->convertInitializerTo(IntRecTy::get()));
+      dyn_cast_or_null<IntInit>(RHS->convertInitializerTo(IntRecTy::get()));
 
     if (L && R)
       return IntInit::get(L->getValue() == R->getValue());
 
-    StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
-    StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+    StringInit *LHSs = dyn_cast<StringInit>(LHS);
+    StringInit *RHSs = dyn_cast<StringInit>(RHS);
 
     // Make sure we've resolved
     if (LHSs && RHSs)
@@ -916,8 +905,8 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   case SHL:
   case SRA:
   case SRL: {
-    IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
-    IntInit *RHSi = dynamic_cast<IntInit*>(RHS);
+    IntInit *LHSi = dyn_cast<IntInit>(LHS);
+    IntInit *RHSi = dyn_cast<IntInit>(RHS);
     if (LHSi && RHSi) {
       int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue();
       int64_t Result;
@@ -990,7 +979,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
                                MultiClass *CurMultiClass) {
   std::vector<Init *> NewOperands;
 
-  TypedInit *TArg = dynamic_cast<TypedInit*>(Arg);
+  TypedInit *TArg = dyn_cast<TypedInit>(Arg);
 
   // If this is a dag, recurse
   if (TArg && TArg->getType()->getAsString() == "dag") {
@@ -1004,7 +993,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
   }
 
   for (int i = 0; i < RHSo->getNumOperands(); ++i) {
-    OpInit *RHSoo = dynamic_cast<OpInit*>(RHSo->getOperand(i));
+    OpInit *RHSoo = dyn_cast<OpInit>(RHSo->getOperand(i));
 
     if (RHSoo) {
       Init *Result = EvaluateOperation(RHSoo, LHS, Arg,
@@ -1032,16 +1021,16 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
 
 static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
                            Record *CurRec, MultiClass *CurMultiClass) {
-  DagInit *MHSd = dynamic_cast<DagInit*>(MHS);
-  ListInit *MHSl = dynamic_cast<ListInit*>(MHS);
+  DagInit *MHSd = dyn_cast<DagInit>(MHS);
+  ListInit *MHSl = dyn_cast<ListInit>(MHS);
 
-  OpInit *RHSo = dynamic_cast<OpInit*>(RHS);
+  OpInit *RHSo = dyn_cast<OpInit>(RHS);
 
   if (!RHSo) {
     throw TGError(CurRec->getLoc(), "!foreach requires an operator\n");
   }
 
-  TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS);
+  TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
 
   if (!LHSt) {
     throw TGError(CurRec->getLoc(), "!foreach requires typed variable\n");
@@ -1110,17 +1099,17 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
 Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   switch (getOpcode()) {
   case SUBST: {
-    DefInit *LHSd = dynamic_cast<DefInit*>(LHS);
-    VarInit *LHSv = dynamic_cast<VarInit*>(LHS);
-    StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+    DefInit *LHSd = dyn_cast<DefInit>(LHS);
+    VarInit *LHSv = dyn_cast<VarInit>(LHS);
+    StringInit *LHSs = dyn_cast<StringInit>(LHS);
 
-    DefInit *MHSd = dynamic_cast<DefInit*>(MHS);
-    VarInit *MHSv = dynamic_cast<VarInit*>(MHS);
-    StringInit *MHSs = dynamic_cast<StringInit*>(MHS);
+    DefInit *MHSd = dyn_cast<DefInit>(MHS);
+    VarInit *MHSv = dyn_cast<VarInit>(MHS);
+    StringInit *MHSs = dyn_cast<StringInit>(MHS);
 
-    DefInit *RHSd = dynamic_cast<DefInit*>(RHS);
-    VarInit *RHSv = dynamic_cast<VarInit*>(RHS);
-    StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+    DefInit *RHSd = dyn_cast<DefInit>(RHS);
+    VarInit *RHSv = dyn_cast<VarInit>(RHS);
+    StringInit *RHSs = dyn_cast<StringInit>(RHS);
 
     if ((LHSd && MHSd && RHSd)
         || (LHSv && MHSv && RHSv)
@@ -1168,9 +1157,9 @@ Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   }
 
   case IF: {
-    IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
+    IntInit *LHSi = dyn_cast<IntInit>(LHS);
     if (Init *I = LHS->convertInitializerTo(IntRecTy::get()))
-      LHSi = dynamic_cast<IntInit*>(I);
+      LHSi = dyn_cast<IntInit>(I);
     if (LHSi) {
       if (LHSi->getValue()) {
         return MHS;
@@ -1190,9 +1179,9 @@ Init *TernOpInit::resolveReferences(Record &R,
   Init *lhs = LHS->resolveReferences(R, RV);
 
   if (Opc == IF && lhs != LHS) {
-    IntInit *Value = dynamic_cast<IntInit*>(lhs);
+    IntInit *Value = dyn_cast<IntInit>(lhs);
     if (Init *I = lhs->convertInitializerTo(IntRecTy::get()))
-      Value = dynamic_cast<IntInit*>(I);
+      Value = dyn_cast<IntInit>(I);
     if (Value != 0) {
       // Short-circuit
       if (Value->getValue()) {
@@ -1285,8 +1274,7 @@ VarInit *VarInit::get(Init *VN, RecTy *T) {
 }
 
 const std::string &VarInit::getName() const {
-  StringInit *NameString =
-    dynamic_cast<StringInit *>(getNameInit());
+  StringInit *NameString = dyn_cast<StringInit>(getNameInit());
   assert(NameString && "VarInit name is not a string!");
   return NameString->getValue();
 }
@@ -1305,9 +1293,9 @@ Init *VarInit::resolveListElementReference(Record &R,
 
   RecordVal *RV = R.getValue(getNameInit());
   assert(RV && "Reference to a non-existent variable?");
-  ListInit *LI = dynamic_cast<ListInit*>(RV->getValue());
+  ListInit *LI = dyn_cast<ListInit>(RV->getValue());
   if (!LI) {
-    TypedInit *VI = dynamic_cast<TypedInit*>(RV->getValue());
+    TypedInit *VI = dyn_cast<TypedInit>(RV->getValue());
     assert(VI && "Invalid list element!");
     return VarListElementInit::get(VI, Elt);
   }
@@ -1318,7 +1306,7 @@ Init *VarInit::resolveListElementReference(Record &R,
   // If the element is set to some value, or if we are resolving a reference
   // to a specific variable and that variable is explicitly unset, then
   // replace the VarListElementInit with it.
-  if (IRV || !dynamic_cast<UnsetInit*>(E))
+  if (IRV || !isa<UnsetInit>(E))
     return E;
   return 0;
 }
@@ -1335,7 +1323,7 @@ Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
                             const std::string &FieldName) const {
   if (isa<RecordRecTy>(getType()))
     if (const RecordVal *Val = R.getValue(VarName)) {
-      if (RV != Val && (RV || dynamic_cast<UnsetInit*>(Val->getValue())))
+      if (RV != Val && (RV || isa<UnsetInit>(Val->getValue())))
         return 0;
       Init *TheInit = Val->getValue();
       assert(TheInit != this && "Infinite loop detected!");
@@ -1354,7 +1342,7 @@ Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
 ///
 Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const {
   if (RecordVal *Val = R.getValue(VarName))
-    if (RV == Val || (RV == 0 && !dynamic_cast<UnsetInit*>(Val->getValue())))
+    if (RV == Val || (RV == 0 && !isa<UnsetInit>(Val->getValue())))
       return Val->getValue();
   return const_cast<VarInit *>(this);
 }
@@ -1422,8 +1410,7 @@ Init *VarListElementInit:: resolveListElementReference(Record &R,
   Init *Result = TI->resolveListElementReference(R, RV, Element);
   
   if (Result) {
-    TypedInit *TInit = dynamic_cast<TypedInit *>(Result);
-    if (TInit) {
+    if (TypedInit *TInit = dyn_cast<TypedInit>(Result)) {
       Init *Result2 = TInit->resolveListElementReference(R, RV, Elt);
       if (Result2) return Result2;
       return new VarListElementInit(TInit, Elt);
@@ -1475,14 +1462,14 @@ Init *FieldInit::getBit(unsigned Bit) const {
 Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV,
                                              unsigned Elt) const {
   if (Init *ListVal = Rec->getFieldInit(R, RV, FieldName))
-    if (ListInit *LI = dynamic_cast<ListInit*>(ListVal)) {
+    if (ListInit *LI = dyn_cast<ListInit>(ListVal)) {
       if (Elt >= LI->getSize()) return 0;
       Init *E = LI->getElement(Elt);
 
       // If the element is set to some value, or if we are resolving a
       // reference to a specific variable and that variable is explicitly
       // unset, then replace the VarListElementInit with it.
-      if (RV || !dynamic_cast<UnsetInit*>(E))
+      if (RV || !isa<UnsetInit>(E))
         return E;
     }
   return 0;
@@ -1611,7 +1598,7 @@ RecordVal::RecordVal(const std::string &N, RecTy *T, unsigned P)
 }
 
 const std::string &RecordVal::getName() const {
-  StringInit *NameString = dynamic_cast<StringInit *>(Name);
+  StringInit *NameString = dyn_cast<StringInit>(Name);
   assert(NameString && "RecordVal name is not a string!");
   return NameString->getValue();
 }
@@ -1641,7 +1628,7 @@ void Record::init() {
 
 void Record::checkName() {
   // Ensure the record name has string type.
-  const TypedInit *TypedName = dynamic_cast<const TypedInit *>(Name);
+  const TypedInit *TypedName = dyn_cast<const TypedInit>(Name);
   assert(TypedName && "Record name is not typed!");
   RecTy *Type = TypedName->getType();
   if (!isa<StringRecTy>(Type))
@@ -1655,8 +1642,7 @@ DefInit *Record::getDefInit() {
 }
 
 const std::string &Record::getName() const {
-  const StringInit *NameString =
-    dynamic_cast<const StringInit *>(Name);
+  const StringInit *NameString = dyn_cast<StringInit>(Name);
   assert(NameString && "Record name is not a string!");
   return NameString->getValue();
 }
@@ -1773,7 +1759,7 @@ std::string Record::getValueAsString(StringRef FieldName) const {
     throw "Record `" + getName() + "' does not have a field named `" +
           FieldName.str() + "'!\n";
 
-  if (StringInit *SI = dynamic_cast<StringInit*>(R->getValue()))
+  if (StringInit *SI = dyn_cast<StringInit>(R->getValue()))
     return SI->getValue();
   throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a string initializer!";
@@ -1789,7 +1775,7 @@ BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
     throw "Record `" + getName() + "' does not have a field named `" +
           FieldName.str() + "'!\n";
 
-  if (BitsInit *BI = dynamic_cast<BitsInit*>(R->getValue()))
+  if (BitsInit *BI = dyn_cast<BitsInit>(R->getValue()))
     return BI;
   throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a BitsInit initializer!";
@@ -1805,7 +1791,7 @@ ListInit *Record::getValueAsListInit(StringRef FieldName) const {
     throw "Record `" + getName() + "' does not have a field named `" +
           FieldName.str() + "'!\n";
 
-  if (ListInit *LI = dynamic_cast<ListInit*>(R->getValue()))
+  if (ListInit *LI = dyn_cast<ListInit>(R->getValue()))
     return LI;
   throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a list initializer!";
@@ -1820,7 +1806,7 @@ Record::getValueAsListOfDefs(StringRef FieldName) const {
   ListInit *List = getValueAsListInit(FieldName);
   std::vector<Record*> Defs;
   for (unsigned i = 0; i < List->getSize(); i++) {
-    if (DefInit *DI = dynamic_cast<DefInit*>(List->getElement(i))) {
+    if (DefInit *DI = dyn_cast<DefInit>(List->getElement(i))) {
       Defs.push_back(DI->getDef());
     } else {
       throw "Record `" + getName() + "', field `" + FieldName.str() +
@@ -1840,7 +1826,7 @@ int64_t Record::getValueAsInt(StringRef FieldName) const {
     throw "Record `" + getName() + "' does not have a field named `" +
           FieldName.str() + "'!\n";
 
-  if (IntInit *II = dynamic_cast<IntInit*>(R->getValue()))
+  if (IntInit *II = dyn_cast<IntInit>(R->getValue()))
     return II->getValue();
   throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have an int initializer!";
@@ -1855,7 +1841,7 @@ Record::getValueAsListOfInts(StringRef FieldName) const {
   ListInit *List = getValueAsListInit(FieldName);
   std::vector<int64_t> Ints;
   for (unsigned i = 0; i < List->getSize(); i++) {
-    if (IntInit *II = dynamic_cast<IntInit*>(List->getElement(i))) {
+    if (IntInit *II = dyn_cast<IntInit>(List->getElement(i))) {
       Ints.push_back(II->getValue());
     } else {
       throw "Record `" + getName() + "', field `" + FieldName.str() +
@@ -1874,7 +1860,7 @@ Record::getValueAsListOfStrings(StringRef FieldName) const {
   ListInit *List = getValueAsListInit(FieldName);
   std::vector<std::string> Strings;
   for (unsigned i = 0; i < List->getSize(); i++) {
-    if (StringInit *II = dynamic_cast<StringInit*>(List->getElement(i))) {
+    if (StringInit *II = dyn_cast<StringInit>(List->getElement(i))) {
       Strings.push_back(II->getValue());
     } else {
       throw "Record `" + getName() + "', field `" + FieldName.str() +
@@ -1894,7 +1880,7 @@ Record *Record::getValueAsDef(StringRef FieldName) const {
     throw "Record `" + getName() + "' does not have a field named `" +
       FieldName.str() + "'!\n";
 
-  if (DefInit *DI = dynamic_cast<DefInit*>(R->getValue()))
+  if (DefInit *DI = dyn_cast<DefInit>(R->getValue()))
     return DI->getDef();
   throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a def initializer!";
@@ -1910,7 +1896,7 @@ bool Record::getValueAsBit(StringRef FieldName) const {
     throw "Record `" + getName() + "' does not have a field named `" +
       FieldName.str() + "'!\n";
 
-  if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue()))
+  if (BitInit *BI = dyn_cast<BitInit>(R->getValue()))
     return BI->getValue();
   throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a bit initializer!";
@@ -1927,7 +1913,7 @@ bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const {
     return false;
   }
   Unset = false;
-  if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue()))
+  if (BitInit *BI = dyn_cast<BitInit>(R->getValue()))
     return BI->getValue();
   throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a bit initializer!";
@@ -1943,7 +1929,7 @@ DagInit *Record::getValueAsDag(StringRef FieldName) const {
     throw "Record `" + getName() + "' does not have a field named `" +
       FieldName.str() + "'!\n";
 
-  if (DagInit *DI = dynamic_cast<DagInit*>(R->getValue()))
+  if (DagInit *DI = dyn_cast<DagInit>(R->getValue()))
     return DI;
   throw "Record `" + getName() + "', field `" + FieldName.str() +
         "' does not have a dag initializer!";
@@ -2004,7 +1990,7 @@ RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
 /// to CurRec's name.
 Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
                         Init *Name, const std::string &Scoper) {
-  RecTy *Type = dynamic_cast<TypedInit *>(Name)->getType();
+  RecTy *Type = dyn_cast<TypedInit>(Name)->getType();
 
   BinOpInit *NewName =
     BinOpInit::get(BinOpInit::STRCONCAT, 
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 0ed75f014ee..b1f9f724efd 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -93,7 +93,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
   // Do not allow assignments like 'X = X'.  This will just cause infinite loops
   // in the resolution machinery.
   if (BitList.empty())
-    if (VarInit *VI = dynamic_cast<VarInit*>(V))
+    if (VarInit *VI = dyn_cast<VarInit>(V))
       if (VI->getNameInit() == ValName)
         return false;
 
@@ -102,7 +102,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
   // initializer.
   //
   if (!BitList.empty()) {
-    BitsInit *CurVal = dynamic_cast<BitsInit*>(RV->getValue());
+    BitsInit *CurVal = dyn_cast<BitsInit>(RV->getValue());
     if (CurVal == 0)
       return Error(Loc, "Value '" + ValName->getAsUnquotedString()
                    + "' is not a bits type");
@@ -114,7 +114,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
     }
 
     // We should have a BitsInit type now.
-    BitsInit *BInit = dynamic_cast<BitsInit*>(BI);
+    BitsInit *BInit = dyn_cast<BitsInit>(BI);
     assert(BInit != 0);
 
     SmallVector<Init *, 16> NewBits(CurVal->getNumBits());
@@ -310,7 +310,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
   if (IterVals.size() != Loops.size()) {
     assert(IterVals.size() < Loops.size());
     ForeachLoop &CurLoop = Loops[IterVals.size()];
-    ListInit *List = dynamic_cast<ListInit *>(CurLoop.ListValue);
+    ListInit *List = dyn_cast<ListInit>(CurLoop.ListValue);
     if (List == 0) {
       Error(Loc, "Loop list is not a list");
       return true;
@@ -335,7 +335,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
   // Set the iterator values now.
   for (unsigned i = 0, e = IterVals.size(); i != e; ++i) {
     VarInit *IterVar = IterVals[i].IterVar;
-    TypedInit *IVal = dynamic_cast<TypedInit *>(IterVals[i].IterValue);
+    TypedInit *IVal = dyn_cast<TypedInit>(IterVals[i].IterValue);
     if (IVal == 0) {
       Error(Loc, "foreach iterator value is untyped");
       return true;
@@ -406,8 +406,7 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) {
 
   RecTy *Type = 0;
   if (CurRec) {
-    const TypedInit *CurRecName =
-      dynamic_cast<const TypedInit *>(CurRec->getNameInit());
+    const TypedInit *CurRecName = dyn_cast<TypedInit>(CurRec->getNameInit());
     if (!CurRecName) {
       TokError("Record name is not typed!");
       return 0;
@@ -780,7 +779,7 @@ Init *TGParser::ParseIDValue(Record *CurRec,
   for (LoopVector::iterator i = Loops.begin(), iend = Loops.end();
        i != iend;
        ++i) {
-    VarInit *IterVar = dynamic_cast<VarInit *>(i->IterVar);
+    VarInit *IterVar = dyn_cast<VarInit>(i->IterVar);
     if (IterVar && IterVar->getName() == Name)
       return IterVar;
   }
@@ -855,9 +854,9 @@ Init *TGParser::ParseOperation(Record *CurRec) {
     if (Code == UnOpInit::HEAD
         || Code == UnOpInit::TAIL
         || Code == UnOpInit::EMPTY) {
-      ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
-      StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
-      TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS);
+      ListInit *LHSl = dyn_cast<ListInit>(LHS);
+      StringInit *LHSs = dyn_cast<StringInit>(LHS);
+      TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
       if (LHSl == 0 && LHSs == 0 && LHSt == 0) {
         TokError("expected list or string type argument in unary operator");
         return 0;
@@ -884,7 +883,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
         }
         if (LHSl) {
           Init *Item = LHSl->getElement(0);
-          TypedInit *Itemt = dynamic_cast<TypedInit*>(Item);
+          TypedInit *Itemt = dyn_cast<TypedInit>(Item);
           if (Itemt == 0) {
             TokError("untyped list element in unary operator");
             return 0;
@@ -1046,24 +1045,24 @@ Init *TGParser::ParseOperation(Record *CurRec) {
       RecTy *MHSTy = 0;
       RecTy *RHSTy = 0;
 
-      if (TypedInit *MHSt = dynamic_cast<TypedInit*>(MHS))
+      if (TypedInit *MHSt = dyn_cast<TypedInit>(MHS))
         MHSTy = MHSt->getType();
-      if (BitsInit *MHSbits = dynamic_cast<BitsInit*>(MHS))
+      if (BitsInit *MHSbits = dyn_cast<BitsInit>(MHS))
         MHSTy = BitsRecTy::get(MHSbits->getNumBits());
-      if (dynamic_cast<BitInit*>(MHS))
+      if (isa<BitInit>(MHS))
         MHSTy = BitRecTy::get();
 
-      if (TypedInit *RHSt = dynamic_cast<TypedInit*>(RHS))
+      if (TypedInit *RHSt = dyn_cast<TypedInit>(RHS))
         RHSTy = RHSt->getType();
-      if (BitsInit *RHSbits = dynamic_cast<BitsInit*>(RHS))
+      if (BitsInit *RHSbits = dyn_cast<BitsInit>(RHS))
         RHSTy = BitsRecTy::get(RHSbits->getNumBits());
-      if (dynamic_cast<BitInit*>(RHS))
+      if (isa<BitInit>(RHS))
         RHSTy = BitRecTy::get();
 
       // For UnsetInit, it's typed from the other hand.
-      if (dynamic_cast<UnsetInit*>(MHS))
+      if (isa<UnsetInit>(MHS))
         MHSTy = RHSTy;
-      if (dynamic_cast<UnsetInit*>(RHS))
+      if (isa<UnsetInit>(RHS))
         RHSTy = MHSTy;
 
       if (!MHSTy || !RHSTy) {
@@ -1082,7 +1081,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
       break;
     }
     case tgtok::XForEach: {
-      TypedInit *MHSt = dynamic_cast<TypedInit *>(MHS);
+      TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
       if (MHSt == 0) {
         TokError("could not get type for !foreach");
         return 0;
@@ -1091,7 +1090,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
       break;
     }
     case tgtok::XSubst: {
-      TypedInit *RHSt = dynamic_cast<TypedInit *>(RHS);
+      TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
       if (RHSt == 0) {
         TokError("could not get type for !subst");
         return 0;
@@ -1315,7 +1314,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
     for (std::vector<Init *>::iterator i = Vals.begin(), ie = Vals.end();
          i != ie;
          ++i) {
-      TypedInit *TArg = dynamic_cast<TypedInit*>(*i);
+      TypedInit *TArg = dyn_cast<TypedInit>(*i);
       if (TArg == 0) {
         TokError("Untyped list element");
         return 0;
@@ -1498,7 +1497,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
       // Create a !strconcat() operation, first casting each operand to
       // a string if necessary.
 
-      TypedInit *LHS = dynamic_cast<TypedInit *>(Result);
+      TypedInit *LHS = dyn_cast<TypedInit>(Result);
       if (!LHS) {
         Error(PasteLoc, "LHS of paste is not typed!");
         return 0;
@@ -1525,7 +1524,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
 
       default:
         Init *RHSResult = ParseValue(CurRec, ItemType, ParseNameMode);
-        RHS = dynamic_cast<TypedInit *>(RHSResult);
+        RHS = dyn_cast<TypedInit>(RHSResult);
         if (!RHS) {
           Error(PasteLoc, "RHS of paste is not typed!");
           return 0;
@@ -1716,7 +1715,7 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) {
   default: TokError("Unknown token when expecting a range list"); return 0;
   case tgtok::l_square: { // '[' ValueList ']'
     Init *List = ParseSimpleValue(0, 0, ParseForeachMode);
-    ForeachListValue = dynamic_cast<ListInit*>(List);
+    ForeachListValue = dyn_cast<ListInit>(List);
     if (ForeachListValue == 0) {
       TokError("Expected a Value list");
       return 0;
@@ -2257,7 +2256,7 @@ InstantiateMulticlassDef(MultiClass &MC,
 
   Init *DefName = DefProto->getNameInit();
 
-  StringInit *DefNameString = dynamic_cast<StringInit *>(DefName);
+  StringInit *DefNameString = dyn_cast<StringInit>(DefName);
 
   if (DefNameString != 0) {
     // We have a fully expanded string so there are no operators to
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 6b98d446b00..ae531c4ea88 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -102,8 +102,6 @@ public:
   virtual void print(raw_ostream &O) const;
   void print(raw_ostream *O) const { if (O) print(*O); }
   void dump() const;
-
-  static bool classof(const ARMConstantPoolValue *) { return true; }
 };
 
 inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
@@ -158,7 +156,6 @@ public:
   static bool classof(const ARMConstantPoolValue *APV) {
     return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
   }
-  static bool classof(const ARMConstantPoolConstant *) { return true; }
 };
 
 /// ARMConstantPoolSymbol - ARM-specific constantpool values for external
@@ -192,7 +189,6 @@ public:
   static bool classof(const ARMConstantPoolValue *ACPV) {
     return ACPV->isExtSymbol();
   }
-  static bool classof(const ARMConstantPoolSymbol *) { return true; }
 };
 
 /// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
@@ -225,7 +221,6 @@ public:
   static bool classof(const ARMConstantPoolValue *ACPV) {
     return ACPV->isMachineBasicBlock();
   }
-  static bool classof(const ARMConstantPoolMBB *) { return true; }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp
index d88bf0c8fa1..7bca0edf915 100644
--- a/lib/Target/ARM/ARMELFWriterInfo.cpp
+++ b/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
-  : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits() == 64,
+  : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits(0) == 64,
                         TM.getDataLayout()->isLittleEndian()) {
 }
 
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 051aab05cbd..b2eb5784879 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -122,6 +122,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
   setOperationAction(ISD::SELECT,            VT, Expand);
   setOperationAction(ISD::SELECT_CC,         VT, Expand);
+  setOperationAction(ISD::VSELECT,           VT, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
   if (VT.isInteger()) {
     setOperationAction(ISD::SHL, VT, Custom);
@@ -1655,22 +1656,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 /// and then confiscate the rest of the parameter registers to insure
 /// this.
 void
-ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ARMTargetLowering::HandleByVal(
+    CCState *State, unsigned &size, unsigned Align) const {
   unsigned reg = State->AllocateReg(GPRArgRegs, 4);
   assert((State->getCallOrPrologue() == Prologue ||
           State->getCallOrPrologue() == Call) &&
          "unhandled ParmContext");
   if ((!State->isFirstByValRegValid()) &&
       (ARM::R0 <= reg) && (reg <= ARM::R3)) {
-    State->setFirstByValReg(reg);
-    // At a call site, a byval parameter that is split between
-    // registers and memory needs its size truncated here.  In a
-    // function prologue, such byval parameters are reassembled in
-    // memory, and are not truncated.
-    if (State->getCallOrPrologue() == Call) {
-      unsigned excess = 4 * (ARM::R4 - reg);
-      assert(size >= excess && "expected larger existing stack allocation");
-      size -= excess;
+    if (Subtarget->isAAPCS_ABI() && Align > 4) {
+      unsigned AlignInRegs = Align / 4;
+      unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
+      for (unsigned i = 0; i < Waste; ++i)
+        reg = State->AllocateReg(GPRArgRegs, 4);
+    }
+    if (reg != 0) {
+      State->setFirstByValReg(reg);
+      // At a call site, a byval parameter that is split between
+      // registers and memory needs its size truncated here.  In a
+      // function prologue, such byval parameters are reassembled in
+      // memory, and are not truncated.
+      if (State->getCallOrPrologue() == Call) {
+        unsigned excess = 4 * (ARM::R4 - reg);
+        assert(size >= excess && "expected larger existing stack allocation");
+        size -= excess;
+      }
     }
   }
   // Confiscate any remaining parameter registers to preclude their
@@ -1803,6 +1813,14 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     }
   }
 
+  // If Caller's vararg or byval argument has been split between registers and
+  // stack, do not perform tail call, since part of the argument is in caller's
+  // local frame.
+  const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
+                                      getInfo<ARMFunctionInfo>();
+  if (AFI_Caller->getVarArgsRegSaveSize())
+    return false;
+
   // If the callee takes no arguments then go on to check the results of the
   // call.
   if (!Outs.empty()) {
@@ -4221,9 +4239,26 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       // If we are VDUPing a value that comes directly from a vector, that will
       // cause an unnecessary move to and from a GPR, where instead we could
       // just use VDUPLANE.
-      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
-        N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+        // We need to create a new undef vector to use for the VDUPLANE if the
+        // size of the vector from which we get the value is different than the
+        // size of the vector that we need to create. We will insert the element
+        // such that the register coalescer will remove unnecessary copies.
+        if (VT != Value->getOperand(0).getValueType()) {
+          ConstantSDNode *constIndex;
+          constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
+          assert(constIndex && "The index is not a constant!");
+          unsigned index = constIndex->getAPIntValue().getLimitedValue() %
+                             VT.getVectorNumElements();
+          N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+                 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
+                        Value, DAG.getConstant(index, MVT::i32)),
+                           DAG.getConstant(index, MVT::i32));
+        } else {
+          N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
                         Value->getOperand(0), Value->getOperand(1));
+        }
+      }
       else
         N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
 
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index b5020c97108..9acab0b0834 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -480,7 +480,7 @@ namespace llvm {
                 SmallVectorImpl<SDValue> &InVals) const;
 
     /// HandleByVal - Target-specific cleanup for ByVal support.
-    virtual void HandleByVal(CCState *, unsigned &) const;
+    virtual void HandleByVal(CCState *, unsigned &, unsigned) const;
 
     /// IsEligibleForTailCallOptimization - Check whether the call is eligible
     /// for tail call optimization. Targets which want to do tail call
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index de655f1a0ee..ede4def2b73 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4500,12 +4500,25 @@ def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
                                     (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
           (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
         Requires<[HasNEON]>;
+def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
+                                    (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
+          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+        Requires<[HasNEON]>;
+def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
+                                    (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
+          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+        Requires<[HasNEON]>;
 
 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
                      (and DPR:$Vm, (vnotd DPR:$Vd)))),
           (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
         Requires<[HasNEON]>;
 
+def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
+                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
+          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+        Requires<[HasNEON]>;
+
 def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
                      (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
                      N3RegFrm, IIC_VCNTiQ,
@@ -4525,11 +4538,23 @@ def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
                                     (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
           (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
         Requires<[HasNEON]>;
+def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
+                                    (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
+          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+        Requires<[HasNEON]>;
+def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
+                                    (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
+          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+        Requires<[HasNEON]>;
 
 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
                      (and QPR:$Vm, (vnotq QPR:$Vd)))),
           (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
         Requires<[HasNEON]>;
+def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
+                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
+          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+        Requires<[HasNEON]>;
 
 //   VBIF     : Vector Bitwise Insert if False
 //              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6fdf873a8f0..c51ae24c50e 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -71,7 +71,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
     ELFWriterInfo(*this),
     TLInfo(*this),
     TSInfo(*this),
-    FrameLowering(Subtarget) {
+    FrameLowering(Subtarget),
+    STTI(&TLInfo) {
   if (!Subtarget.hasARMOps())
     report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
                        "support ARM mode execution!");
@@ -104,7 +105,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
     TSInfo(*this),
     FrameLowering(Subtarget.hasThumb2()
               ? new ARMFrameLowering(Subtarget)
-              : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+              : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)),
+    STTI(&TLInfo){
 }
 
 namespace {
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index f91e5bbd477..7a65a7f062d 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -25,6 +25,7 @@
 #include "Thumb1FrameLowering.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetTransformImpl.h"
 #include "llvm/DataLayout.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/ADT/OwningPtr.h"
@@ -67,6 +68,8 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
   ARMTargetLowering   TLInfo;
   ARMSelectionDAGInfo TSInfo;
   ARMFrameLowering    FrameLowering;
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformImpl VTTI;
  public:
   ARMTargetMachine(const Target &T, StringRef TT,
                    StringRef CPU, StringRef FS,
@@ -88,7 +91,12 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
   virtual const ARMFrameLowering *getFrameLowering() const {
     return &FrameLowering;
   }
-
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
   virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const DataLayout       *getDataLayout() const { return &DL; }
   virtual const ARMELFWriterInfo *getELFWriterInfo() const {
@@ -110,6 +118,8 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
   ARMSelectionDAGInfo TSInfo;
   // Either Thumb1FrameLowering or ARMFrameLowering.
   OwningPtr<ARMFrameLowering> FrameLowering;
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformImpl VTTI;
 public:
   ThumbTargetMachine(const Target &T, StringRef TT,
                      StringRef CPU, StringRef FS,
@@ -138,6 +148,12 @@ public:
   virtual const ARMFrameLowering *getFrameLowering() const {
     return FrameLowering.get();
   }
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
   virtual const DataLayout       *getDataLayout() const { return &DL; }
   virtual const ARMELFWriterInfo *getELFWriterInfo() const {
     return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 93e5eca6252..0eec8622e97 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -259,9 +259,10 @@ public:
 
   unsigned checkTargetMatchPredicate(MCInst &Inst);
 
-  bool MatchAndEmitInstruction(SMLoc IDLoc,
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                               MCStreamer &Out);
+                               MCStreamer &Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
 };
 } // end anonymous namespace
 
@@ -7474,17 +7475,14 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
 
 static const char *getSubtargetFeatureName(unsigned Val);
 bool ARMAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCStreamer &Out) {
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
   MCInst Inst;
-  unsigned Kind;
-  unsigned ErrorInfo;
   unsigned MatchResult;
-  MatchInstMapAndConstraints MapAndConstraints;
-  MatchResult = MatchInstructionImpl(Operands, Kind, Inst,
-                                     MapAndConstraints, ErrorInfo,
-                                     /*matchingInlineAsm*/ false);
+  MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+                                     MatchingInlineAsm);
   switch (MatchResult) {
   default: break;
   case Match_Success:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 6bddc42b373..b404e6c6e01 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -67,9 +67,6 @@ public:
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Target;
   }
-
-  static bool classof(const ARMMCExpr *) { return true; }
-
 };
 } // end namespace llvm
 
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 096ef001ed3..48df199437b 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_library(LLVMTarget
   TargetMachineC.cpp
   TargetRegisterInfo.cpp
   TargetSubtargetInfo.cpp
+  TargetTransformImpl.cpp
   )
 
 foreach(t ${LLVM_TARGETS_TO_BUILD})
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index a37ad7f85ae..e92ad01e1d5 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -43,7 +43,8 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
     FrameLowering(Subtarget),
     TLInfo(*this),
     TSInfo(*this),
-    InstrItins(Subtarget.getInstrItineraryData()) {
+    InstrItins(Subtarget.getInstrItineraryData()),
+    STTI(&TLInfo){
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 58699a30d26..7f53ea6fbeb 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -20,6 +20,7 @@
 #include "SPUSelectionDAGInfo.h"
 #include "SPUFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetTransformImpl.h"
 #include "llvm/DataLayout.h"
 
 namespace llvm {
@@ -34,6 +35,8 @@ class SPUTargetMachine : public LLVMTargetMachine {
   SPUTargetLowering   TLInfo;
   SPUSelectionDAGInfo TSInfo;
   InstrItineraryData  InstrItins;
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformImpl VTTI;
 public:
   SPUTargetMachine(const Target &T, StringRef TT,
                    StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -77,6 +80,12 @@ public:
   virtual const InstrItineraryData *getInstrItineraryData() const {
     return &InstrItins;
   }
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
 
   // Pass Pipeline Configuration
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 096e2bc13b0..61fb4e98ecd 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -474,9 +474,9 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
     Out << "AttributeWithIndex PAWI;"; nl(Out);
     for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
       unsigned index = PAL.getSlot(i).Index;
-      Attributes::Builder attrs(PAL.getSlot(i).Attrs);
+      AttrBuilder attrs(PAL.getSlot(i).Attrs);
       Out << "PAWI.Index = " << index << "U;\n";
-      Out << "   Attributes::Builder B;\n";
+      Out << "   AttrBuilder B;\n";
 
 #define HANDLE_ATTR(X)                                     \
       if (attrs.hasAttribute(Attributes::X))               \
@@ -509,13 +509,11 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
       HANDLE_ATTR(NonLazyBind);
 #undef HANDLE_ATTR
       if (attrs.hasAttribute(Attributes::StackAlignment))
-        Out << "B.addStackAlignmentAttr(Attribute::constructStackAlignmentFromInt("
-            << attrs.getStackAlignment()
-            << "))";
+        Out << "B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")";
       nl(Out);
       attrs.removeAttribute(Attributes::StackAlignment);
       assert(!attrs.hasAttributes() && "Unhandled attribute!");
-      Out << "PAWI.Attrs = Attributes::get(B);";
+      Out << "PAWI.Attrs = Attributes::get(mod->getContext(), B);";
       nl(Out);
       Out << "Attrs.push_back(PAWI);";
       nl(Out);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index d198a3f45b5..353542a8097 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -74,7 +74,8 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
     Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
     TSInfo(*this),
     FrameLowering(Subtarget),
-    InstrItins(&Subtarget.getInstrItineraryData()) {
+    InstrItins(&Subtarget.getInstrItineraryData()),
+    STTI(&TLInfo) {
   setMCUseCFI(false);
 }
 
@@ -87,7 +88,7 @@ bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
   PM.add(createDeadCodeEliminationPass());
   PM.add(createConstantPropagationPass());
   PM.add(createLoopUnrollPass());
-  PM.add(createLoopStrengthReducePass(getTargetLowering()));
+  PM.add(createLoopStrengthReducePass());
   return true;
 }
 
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index ade5b3e9c1f..7a4215c119a 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -21,6 +21,7 @@
 #include "HexagonFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/DataLayout.h"
+#include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
 
@@ -34,6 +35,8 @@ class HexagonTargetMachine : public LLVMTargetMachine {
   HexagonSelectionDAGInfo TSInfo;
   HexagonFrameLowering FrameLowering;
   const InstrItineraryData* InstrItins;
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformImpl VTTI;
 
 public:
   HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
@@ -68,6 +71,14 @@ public:
     return &TSInfo;
   }
 
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
+
   virtual const DataLayout       *getDataLayout() const { return &DL; }
   static unsigned getModuleMatchQuality(const Module &M);
 
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index d1e18b24c39..9e28a3d7d09 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -44,9 +44,10 @@ class MBlazeAsmParser : public MCTargetAsmParser {
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
 
-  bool MatchAndEmitInstruction(SMLoc IDLoc,
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                               MCStreamer &Out);
+                               MCStreamer &Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
 
   /// @name Auto-generated Match Functions
   /// {
@@ -312,15 +313,13 @@ static unsigned MatchRegisterName(StringRef Name);
 /// }
 //
 bool MBlazeAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCStreamer &Out) {
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
   MCInst Inst;
-  unsigned Kind;
-  unsigned ErrorInfo;
-  MatchInstMapAndConstraints MapAndConstraints;
-  switch (MatchInstructionImpl(Operands, Kind, Inst, MapAndConstraints,
-                               ErrorInfo, /*matchingInlineAsm*/ false)) {
+  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo,
+                               MatchingInlineAsm)) {
   default: break;
   case Match_Success:
     Out.EmitInstruction(Inst);
diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
index 4ca30ba81f7..6b575099e59 100644
--- a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM)
-  : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits() == 64,
+  : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits(0) == 64,
                         TM.getDataLayout()->isLittleEndian()) {
 }
 
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 91aaf940e62..1c2e3b26613 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -83,7 +83,7 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
 #undef GET_INTRINSIC_OVERLOAD_TABLE
 }
 
-/// This defines the "getAttributes(ID id)" method.
+/// This defines the "getAttributes(LLVMContext &C, ID id)" method.
 #define GET_INTRINSIC_ATTRIBUTES
 #include "MBlazeGenIntrinsics.inc"
 #undef GET_INTRINSIC_ATTRIBUTES
@@ -104,7 +104,8 @@ Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
                                                 Type **Tys,
                                                 unsigned numTy) const {
   assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
-  AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
+  AttrListPtr AList = getAttributes(M->getContext(),
+                                    (mblazeIntrinsic::ID) IntrID);
   return cast<Function>(M->getOrInsertFunction(getName(IntrID),
                                                getType(M->getContext(), IntrID),
                                                AList));
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 1f2cf6d9d2f..cb5f46062d9 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -42,7 +42,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT,
     InstrInfo(*this),
     FrameLowering(Subtarget),
     TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
-    InstrItins(Subtarget.getInstrItineraryData()) {
+    InstrItins(Subtarget.getInstrItineraryData()), STTI(&TLInfo) {
 }
 
 namespace {
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index d949e54f0d8..34648b9b9ae 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -25,6 +25,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
   class formatted_raw_ostream;
@@ -39,6 +40,8 @@ namespace llvm {
     MBlazeIntrinsicInfo    IntrinsicInfo;
     MBlazeELFWriterInfo    ELFWriterInfo;
     InstrItineraryData     InstrItins;
+    ScalarTargetTransformImpl STTI;
+    VectorTargetTransformImpl VTTI;
 
   public:
     MBlazeTargetMachine(const Target &T, StringRef TT,
@@ -77,6 +80,10 @@ namespace llvm {
     virtual const MBlazeELFWriterInfo *getELFWriterInfo() const {
       return &ELFWriterInfo;
     }
+    virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const
+    { return &STTI; }
+    virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const
+    { return &VTTI; }
 
     // Pass Pipeline Configuration
     virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index fc677aec38e..113378a5f31 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -881,7 +881,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
 
   if (ReturnAddrIndex == 0) {
     // Set up a frame object for the return address.
-    uint64_t SlotSize = TD->getPointerSize();
+    uint64_t SlotSize = TD->getPointerSize(0);
     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
                                                            true);
     FuncInfo->setRAIndex(ReturnAddrIndex);
@@ -901,7 +901,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset =
-      DAG.getConstant(TD->getPointerSize(), MVT::i16);
+      DAG.getConstant(TD->getPointerSize(0), MVT::i16);
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index da5899b86d5..29ea6812162 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
     // FIXME: Check DataLayout string.
     DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
     InstrInfo(*this), TLInfo(*this), TSInfo(*this),
-    FrameLowering(Subtarget) { }
+    FrameLowering(Subtarget), STTI(&TLInfo) { }
 
 namespace {
 /// MSP430 Code Generator Pass Configuration Options.
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index ba3cef1f2ad..186172ede42 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -24,6 +24,7 @@
 #include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
 
@@ -36,6 +37,8 @@ class MSP430TargetMachine : public LLVMTargetMachine {
   MSP430TargetLowering   TLInfo;
   MSP430SelectionDAGInfo TSInfo;
   MSP430FrameLowering    FrameLowering;
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformImpl VTTI;
 
 public:
   MSP430TargetMachine(const Target &T, StringRef TT,
@@ -61,7 +64,12 @@ public:
   virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
     return &TSInfo;
   }
-
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 }; // MSP430TargetMachine.
 
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index c2980ffeea8..00649d2f187 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -67,9 +67,10 @@ class MipsAsmParser : public MCTargetAsmParser {
 #define GET_ASSEMBLER_HEADER
 #include "MipsGenAsmMatcher.inc"
 
-  bool MatchAndEmitInstruction(SMLoc IDLoc,
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                               MCStreamer &Out);
+                               MCStreamer &Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
 
   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
 
@@ -452,16 +453,13 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
 }
 
 bool MipsAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCStreamer &Out) {
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
   MCInst Inst;
-  unsigned Kind;
-  unsigned ErrorInfo;
-  MatchInstMapAndConstraints MapAndConstraints;
-  unsigned MatchResult = MatchInstructionImpl(Operands, Kind, Inst,
-                                              MapAndConstraints, ErrorInfo,
-                                              /*matchingInlineAsm*/ false);
+  unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+                                              MatchingInlineAsm);
 
   switch (MatchResult) {
   default: break;
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 127c5b89e8d..8991433005d 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -58,12 +58,22 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I, DebugLoc DL,
                                   unsigned DestReg, unsigned SrcReg,
                                   bool KillSrc) const {
-  unsigned Opc = 0, ZeroReg = 0;
+  unsigned Opc = 0;
+
+  if (Mips::CPU16RegsRegClass.contains(DestReg) &&
+      Mips::CPURegsRegClass.contains(SrcReg))
+    Opc = Mips::MoveR3216;
+  else if (Mips::CPURegsRegClass.contains(DestReg) &&
+           Mips::CPU16RegsRegClass.contains(SrcReg))
+    Opc = Mips::Move32R16;
+  else if ((SrcReg == Mips::HI) &&
+           (Mips::CPU16RegsRegClass.contains(DestReg)))
+    Opc = Mips::Mfhi16, SrcReg = 0;
+
+  else if ((SrcReg == Mips::LO) &&
+           (Mips::CPU16RegsRegClass.contains(DestReg)))
+    Opc = Mips::Mflo16, SrcReg = 0;
 
-  if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
-    if (Mips::CPURegsRegClass.contains(SrcReg))
-      Opc = Mips::Move32R16;
-  }
 
   assert(Opc && "Cannot copy registers");
 
@@ -72,9 +82,6 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   if (DestReg)
     MIB.addReg(DestReg, RegState::Define);
 
-  if (ZeroReg)
-    MIB.addReg(ZeroReg);
-
   if (SrcReg)
     MIB.addReg(SrcReg, getKillRegState(KillSrc));
 }
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index e1c90466fbf..eba201a0ea9 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -118,6 +118,14 @@ class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
   FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
         !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
 }
+
+//
+// maybe refactor but need a $zero as a dummy first parameter
+//
+class FRR16_div_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+  FRR16<f, (outs ), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+        !strconcat(asmstr, "\t$$zero, $rx, $ry"), [], itin> ;
+
 class FRR16_M_ins<bits<5> f, string asmstr,
                   InstrItinClass itin> :
   FRR16<f, (outs CPU16Regs:$rx), (ins),
@@ -196,6 +204,24 @@ def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
 // To do a bitwise logical AND.
 
 def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
+//
+// Format: DIV rx, ry MIPS16e
+// Purpose: Divide Word
+// To divide 32-bit signed integers.
+//
+def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
+  let Defs = [HI, LO];
+}
+
+//
+// Format: DIVU rx, ry MIPS16e
+// Purpose: Divide Unsigned Word
+// To divide 32-bit unsigned integers.
+//
+def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
+  let Defs = [HI, LO];
+}
+
 
 //
 // Format: JR ra MIPS16e
@@ -551,5 +577,20 @@ def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>;
 // Small immediates
 def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
 
+//
+// MipsDivRem
+//
+def: Mips16Pat
+  <(MipsDivRem CPU16Regs:$rx, CPU16Regs:$ry),
+   (DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
+
+//
+// MipsDivRemU
+//
+def: Mips16Pat
+  <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry),
+   (DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
+
+
 def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
                (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index bd472d6f67e..99a9f25abd2 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -315,3 +315,33 @@ def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
 // Instruction aliases
 //===----------------------------------------------------------------------===//
 def : InstAlias<"move $dst,$src", (DADD CPU64Regs:$dst,CPU64Regs:$src,ZERO_64)>;
+
+/// Move between CPU and coprocessor registers
+let DecoderNamespace = "Mips64" in {
+def MFC0_3OP64  : MFC3OP<0x10, 0, (outs CPU64Regs:$rt), 
+                       (ins CPU64Regs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">;
+def MTC0_3OP64  : MFC3OP<0x10, 4, (outs CPU64Regs:$rd, uimm16:$sel),
+                       (ins CPU64Regs:$rt),"mtc0\t$rt, $rd, $sel">;
+def MFC2_3OP64  : MFC3OP<0x12, 0, (outs CPU64Regs:$rt),
+                       (ins CPU64Regs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">;
+def MTC2_3OP64  : MFC3OP<0x12, 4, (outs CPU64Regs:$rd, uimm16:$sel),
+                       (ins CPU64Regs:$rt),"mtc2\t$rt, $rd, $sel">;
+def DMFC0_3OP64  : MFC3OP<0x10, 1, (outs CPU64Regs:$rt), 
+                       (ins CPU64Regs:$rd, uimm16:$sel),"dmfc0\t$rt, $rd, $sel">;
+def DMTC0_3OP64  : MFC3OP<0x10, 5, (outs CPU64Regs:$rd, uimm16:$sel),
+                       (ins CPU64Regs:$rt),"dmtc0\t$rt, $rd, $sel">;
+def DMFC2_3OP64  : MFC3OP<0x12, 1, (outs CPU64Regs:$rt),
+                       (ins CPU64Regs:$rd, uimm16:$sel),"dmfc2\t$rt, $rd, $sel">;
+def DMTC2_3OP64  : MFC3OP<0x12, 5, (outs CPU64Regs:$rd, uimm16:$sel),
+                       (ins CPU64Regs:$rt),"dmtc2\t$rt, $rd, $sel">;
+}
+// Two operand (implicit 0 selector) versions:
+def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+def : InstAlias<"dmfc0 $rt, $rd", (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+def : InstAlias<"dmtc0 $rt, $rd", (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+def : InstAlias<"dmfc2 $rt, $rd", (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+def : InstAlias<"dmtc2 $rt, $rd", (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 9c196dd82f3..4c3981d9f68 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -53,7 +53,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
     InstrInfo(MipsInstrInfo::create(*this)),
     FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
     TLInfo(*this), TSInfo(*this), JITInfo(),
-    ELFWriterInfo(false, isLittle) {
+    ELFWriterInfo(false, isLittle), STTI(&TLInfo) {
 }
 
 void MipsebTargetMachine::anchor() { }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 3a01828dd1d..60822d0c055 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -24,6 +24,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
 class formatted_raw_ostream;
@@ -38,6 +39,8 @@ class MipsTargetMachine : public LLVMTargetMachine {
   MipsSelectionDAGInfo TSInfo;
   MipsJITInfo JITInfo;
   MipsELFWriterInfo   ELFWriterInfo;
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformInfo VTTI; 
 
 public:
   MipsTargetMachine(const Target &T, StringRef TT,
@@ -74,6 +77,12 @@ public:
   virtual const MipsELFWriterInfo *getELFWriterInfo() const {
     return &ELFWriterInfo;
   }
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
 
   // Pass Pipeline Configuration
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index d3dfb35e261..c46094569e9 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -126,8 +126,10 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
       return Base;
 
     // Truncate/sext the offset to the pointer size.
-    if (TD.getPointerSizeInBits() != 64) {
-      int SExtAmount = 64-TD.getPointerSizeInBits();
+    unsigned AS = PtrVal->getType()->isPointerTy() ?
+      cast<PointerType>(PtrVal->getType())->getAddressSpace() : 0;
+    if (TD.getPointerSizeInBits(AS) != 64) {
+      int SExtAmount = 64-TD.getPointerSizeInBits(AS);
       Offset = (Offset << SExtAmount) >> SExtAmount;
     }
 
@@ -1378,7 +1380,7 @@ getOpenCLAlignment(const DataLayout *TD,
 
   const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
   if (FTy)
-    return TD->getPointerPrefAlignment();
+    return TD->getPointerPrefAlignment(0);
   return TD->getPrefTypeAlignment(Ty);
 }
 
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index dbfc660687e..7519b4a0831 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -72,7 +72,8 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
   Subtarget(TT, CPU, FS, is64bit),
   DL(Subtarget.getDataLayout()),
-  InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
+  InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit),
+  STTI(&TLInfo)
 /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
 }
 
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index d58a0768581..11bc9d4fa69 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -25,6 +25,7 @@
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
 
@@ -44,6 +45,9 @@ class NVPTXTargetMachine : public LLVMTargetMachine {
   // Hold Strings that can be free'd all together with NVPTXTargetMachine
   ManagedStringPool     ManagedStrPool;
 
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformImpl VTTI;
+
   //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
   //                            bool DisableVerify, MCContext *&OutCtx);
 
@@ -72,6 +76,12 @@ public:
   virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
     return &TSInfo;
   }
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
 
   //virtual bool addInstSelector(PassManagerBase &PM,
   //                             CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 914a9b0dcea..d8abd9fba07 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -439,7 +439,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
 bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
   const DataLayout *TD = TM.getDataLayout();
 
-  bool isPPC64 = TD->getPointerSizeInBits() == 64;
+  bool isPPC64 = TD->getPointerSizeInBits(0) == 64;
 
   if (isPPC64 && !TOC.empty()) {
     const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
@@ -451,8 +451,8 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
     for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
          E = TOC.end(); I != E; ++I) {
       OutStreamer.EmitLabel(I->second);
-      OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) +
-                              "[TC]," + I->first->getName());
+      MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
+      OutStreamer.EmitTCEntry(*S);
     }
   }
 
@@ -545,7 +545,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
 
 void PPCDarwinAsmPrinter::
 EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
-  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits(0) == 64;
   
   const TargetLoweringObjectFileMachO &TLOFMacho = 
     static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
@@ -640,7 +640,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
 
 
 bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
-  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits(0) == 64;
 
   // Darwin/PPC always uses mach-o.
   const TargetLoweringObjectFileMachO &TLOFMacho = 
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index b1c02e57f88..caf7bf2be79 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -50,6 +50,11 @@ static const uint16_t VRRegNo[] = {
 /// to manipulate the VRSAVE register, even though it uses vector registers.
 /// This can happen when the only registers used are known to be live in or out
 /// of the function.  Remove all of the VRSAVE related code from the function.
+/// FIXME: The removal of the code results in a compile failure at -O0 when the
+/// function contains a function call, as the GPR containing original VRSAVE
+/// contents is spilled and reloaded around the call.  Without the prolog code,
+/// the spill instruction refers to an undefined register.  This code needs
+/// to account for all uses of that GPR.
 static void RemoveVRSaveCode(MachineInstr *MI) {
   MachineBasicBlock *Entry = MI->getParent();
   MachineFunction *MF = Entry->getParent();
@@ -283,12 +288,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
 
   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
   // process it.
-  for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
-    if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
-      HandleVRSaveUpdate(MBBI, TII);
-      break;
+  if (!Subtarget.isSVR4ABI())
+    for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+      if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+        HandleVRSaveUpdate(MBBI, TII);
+        break;
+      }
     }
-  }
 
   // Move MBBI back to the beginning of the function.
   MBBI = MBB.begin();
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b52452ce89b..6195441cfc0 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -53,7 +53,9 @@ namespace {
       GlobalBaseReg = 0;
       SelectionDAGISel::runOnMachineFunction(MF);
 
-      InsertVRSaveCode(MF);
+      if (!PPCSubTarget.isSVR4ABI())
+        InsertVRSaveCode(MF);
+
       return true;
     }
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 64bbcdfa94e..c18250a78f7 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2035,9 +2035,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
       ObjSize = Flags.getByValSize();
       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
       // All aggregates smaller than 8 bytes must be passed right-justified.
-      if (ObjSize==1 || ObjSize==2) {
-        CurArgOffset = CurArgOffset + (4 - ObjSize);
-      }
+      if (ObjSize < PtrByteSize)
+        CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
       // The value of the object is its address.
       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -2087,7 +2086,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
           ++GPR_idx;
           ArgOffset += PtrByteSize;
         } else {
-          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+          ArgOffset += ArgSize - j;
           break;
         }
       }
@@ -2142,6 +2141,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
         ++FPR_idx;
       } else {
         needsLoad = true;
+        ArgSize = PtrByteSize;
       }
 
       ArgOffset += 8;
@@ -3638,12 +3638,13 @@ PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
 
           ArgOffset += PtrByteSize;
         } else {
-          SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
+          SDValue Const = DAG.getConstant(PtrByteSize - Size,
+                                          PtrOff.getValueType());
           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
           SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
                                 CallSeqStart.getNode()->getOperand(0),
                                 Flags, DAG, dl);
-          // This must go outside the CALLSEQ_START..END.
+          // The MEMCPY must go outside the CALLSEQ_START..END.
           SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
                                CallSeqStart.getNode()->getOperand(1));
           DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
@@ -3652,6 +3653,25 @@ PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
           ArgOffset += PtrByteSize;
         }
         continue;
+      } else if (isSVR4ABI && GPR_idx == NumGPRs && Size < 8) {
+        // Case: Size is 3, 5, 6, or 7 for SVR4 and we're out of registers.
+        // This is the same case as 1, 2, and 4 for SVR4 with no registers.
+        // FIXME: Separate into 64-bit SVR4 and Darwin versions of this
+        // function, and combine the duplicated code chunks.
+        SDValue Const = DAG.getConstant(PtrByteSize - Size,
+                                        PtrOff.getValueType());
+        SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
+                              CallSeqStart.getNode()->getOperand(0),
+                              Flags, DAG, dl);
+        // The MEMCPY must go outside the CALLSEQ_START..END.
+        SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                                    CallSeqStart.getNode()->getOperand(1));
+        DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+                               NewCallSeqStart.getNode());
+        Chain = CallSeqStart = NewCallSeqStart;
+        ArgOffset += PtrByteSize;
+        continue;
       }
       // Copy entire object into memory.  There are cases where gcc-generated
       // code assumes it is there, even if it could be put entirely into
@@ -3786,6 +3806,13 @@ PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
             ++GPR_idx;
         }
       } else {
+        // Single-precision floating-point values are mapped to the
+        // second (rightmost) word of the stack doubleword.
+        if (Arg.getValueType() == MVT::f32 && isPPC64 && isSVR4ABI) {
+          SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+          PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+        }
+
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          isPPC64, isTailCall, false, MemOpChains,
                          TailCallArguments, dl);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d2df6645bb0..d9d68446f53 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -570,12 +570,15 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
     // STVX VAL, 0, R0
     //
     // FIXME: We use R0 here, because it isn't available for RA.
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+    bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
+    unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
+    unsigned GPR0  = Is64Bit ? PPC::X0    : PPC::R0;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
                                        FrameIdx, 0, 0));
     NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
                      .addReg(SrcReg, getKillRegState(isKill))
-                     .addReg(PPC::R0)
-                     .addReg(PPC::R0));
+                     .addReg(GPR0)
+                     .addReg(GPR0));
   } else {
     llvm_unreachable("Unknown regclass!");
   }
@@ -707,10 +710,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
     // Dest = LVX 0, R0
     //
     // FIXME: We use R0 here, because it isn't available for RA.
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+    bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
+    unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
+    unsigned GPR0  = Is64Bit ? PPC::X0    : PPC::R0;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
                                        FrameIdx, 0, 0));
-    NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
-                     .addReg(PPC::R0));
+    NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0)
+                     .addReg(GPR0));
   } else {
     llvm_unreachable("Unknown regclass!");
   }
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 459c3589d3f..d1232114732 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -498,7 +498,7 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
     } else if (CRSpillFrameIdx) {
       FrameIdx = CRSpillFrameIdx;
     } else {
-      MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo();
+      MachineFrameInfo *MFI = (const_cast<MachineFunction &>(MF)).getFrameInfo();
       FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
       CRSpillFrameIdx = FrameIdx;
     }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 5f39b8d2c29..b8613834753 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -43,7 +43,8 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
     DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
     FrameLowering(Subtarget), JITInfo(*this, is64Bit),
     TLInfo(*this), TSInfo(*this),
-    InstrItins(Subtarget.getInstrItineraryData()) {
+    InstrItins(Subtarget.getInstrItineraryData()),
+    STTI(&TLInfo){
 
   // The binutils for the BG/P are too old for CFI.
   if (Subtarget.isBGP())
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 02d69fd15d1..c168433a71b 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -21,6 +21,7 @@
 #include "PPCISelLowering.h"
 #include "PPCSelectionDAGInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetTransformImpl.h"
 #include "llvm/DataLayout.h"
 
 namespace llvm {
@@ -36,6 +37,8 @@ class PPCTargetMachine : public LLVMTargetMachine {
   PPCTargetLowering   TLInfo;
   PPCSelectionDAGInfo TSInfo;
   InstrItineraryData  InstrItins;
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformImpl VTTI;
 
 public:
   PPCTargetMachine(const Target &T, StringRef TT,
@@ -63,6 +66,12 @@ public:
   virtual const InstrItineraryData *getInstrItineraryData() const {
     return &InstrItins;
   }
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
 
   // Pass Pipeline Configuration
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 8b7559c2f9e..1d8cc771ddf 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
     DL(Subtarget.getDataLayout()),
     InstrInfo(Subtarget),
     TLInfo(*this), TSInfo(*this),
-    FrameLowering(Subtarget) {
+    FrameLowering(Subtarget),STTI(&TLInfo) {
 }
 
 namespace {
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index c9f2d68eb19..0fbe2d7cda3 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -22,6 +22,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
 
@@ -32,6 +33,8 @@ class SparcTargetMachine : public LLVMTargetMachine {
   SparcTargetLowering TLInfo;
   SparcSelectionDAGInfo TSInfo;
   SparcFrameLowering FrameLowering;
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformImpl VTTI;
 public:
   SparcTargetMachine(const Target &T, StringRef TT,
                      StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -52,6 +55,12 @@ public:
   virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
     return &TSInfo;
   }
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
   virtual const DataLayout       *getDataLayout() const { return &DL; }
 
   // Pass Pipeline Configuration
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index b0b5c875b82..393178a4692 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -26,6 +26,7 @@ using namespace llvm;
 void llvm::initializeTarget(PassRegistry &Registry) {
   initializeDataLayoutPass(Registry);
   initializeTargetLibraryInfoPass(Registry);
+  initializeTargetTransformInfoPass(Registry);
 }
 
 void LLVMInitializeTarget(LLVMPassRegistryRef R) {
@@ -55,13 +56,21 @@ LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) {
 }
 
 unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
-  return unwrap(TD)->getPointerSize();
+  return unwrap(TD)->getPointerSize(0);
+}
+
+unsigned LLVMPointerSizeForAS(LLVMTargetDataRef TD, unsigned AS) {
+  return unwrap(TD)->getPointerSize(AS);
 }
 
 LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
   return wrap(unwrap(TD)->getIntPtrType(getGlobalContext()));
 }
 
+LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) {
+  return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), AS));
+}
+
 unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
   return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
 }
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
new file mode 100644
index 00000000000..1cb5edab9d0
--- /dev/null
+++ b/lib/Target/TargetTransformImpl.cpp
@@ -0,0 +1,43 @@
+// llvm/Target/TargetTransformImpl.cpp - Target Loop Trans Info ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const {
+  return TLI->isLegalAddImmediate(imm);
+}
+
+bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const {
+  return TLI->isLegalICmpImmediate(imm);
+}
+
+bool ScalarTargetTransformImpl::isLegalAddressingMode(const AddrMode &AM,
+                                                    Type *Ty) const {
+  return TLI->isLegalAddressingMode(AM, Ty);
+}
+
+bool ScalarTargetTransformImpl::isTruncateFree(Type *Ty1, Type *Ty2) const {
+  return TLI->isTruncateFree(Ty1, Ty2);
+}
+
+bool ScalarTargetTransformImpl::isTypeLegal(Type *Ty) const {
+  EVT T = TLI->getValueType(Ty);
+  return TLI->isTypeLegal(T);
+}
+
+unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const {
+  return TLI->getJumpBufAlignment();
+}
+
+unsigned ScalarTargetTransformImpl::getJumpBufSize() const {
+  return TLI->getJumpBufSize();
+}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index df34359a661..454664e3ed1 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -40,8 +40,8 @@ private:
 
   bool Error(SMLoc L, const Twine &Msg,
              ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
-             bool matchingInlineAsm = false) {
-    if (matchingInlineAsm) return true;
+             bool MatchingInlineAsm = false) {
+    if (MatchingInlineAsm) return true;
     return Parser.Error(L, Msg, Ranges);
   }
 
@@ -63,14 +63,10 @@ private:
   bool processInstruction(MCInst &Inst,
                           const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
 
-  bool MatchAndEmitInstruction(SMLoc IDLoc,
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                               MCStreamer &Out);
-  bool MatchInstruction(SMLoc IDLoc,
-                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCStreamer &Out, unsigned &Kind, unsigned &Opcode,
-                        MatchInstMapAndConstraintsImpl &MapAndConstraints,
-                       unsigned &OrigErrorInfo, bool matchingInlineAsm = false);
+                               MCStreamer &Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
 
   /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
   /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
@@ -756,6 +752,7 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
 
   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
   if (getParser().ParseExpression(Disp, End)) return 0;
+  End = Parser.getTok().getLoc();
   return X86Operand::CreateMem(Disp, Start, End, Size);
 }
 
@@ -1520,29 +1517,18 @@ processInstruction(MCInst &Inst,
 }
 
 bool X86AsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCStreamer &Out) {
-  unsigned Kind;
-  unsigned Opcode;
-  unsigned ErrorInfo;
-  MatchInstMapAndConstraints MapAndConstraints;
-  bool Error = MatchInstruction(IDLoc, Operands, Out, Kind, Opcode,
-                                MapAndConstraints, ErrorInfo);
-  return Error;
-}
-
-bool X86AsmParser::
-MatchInstruction(SMLoc IDLoc,
-                 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                 MCStreamer &Out, unsigned &Kind, unsigned &Opcode,
-  SmallVectorImpl<std::pair< unsigned, std::string > > &MapAndConstraints,
-                 unsigned &OrigErrorInfo, bool matchingInlineAsm) {
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
   assert(!Operands.empty() && "Unexpect empty operand list!");
   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
   assert(Op->isToken() && "Leading operand should always be a mnemonic!");
   ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
 
+  // Clear the opcode.
+  Opcode = ~0x0;
+
   // First, handle aliases that expand to multiple instructions.
   // FIXME: This should be replaced with a real .td file alias mechanism.
   // Also, MatchInstructionImpl should actually *do* the EmitInstruction
@@ -1554,7 +1540,7 @@ MatchInstruction(SMLoc IDLoc,
     MCInst Inst;
     Inst.setOpcode(X86::WAIT);
     Inst.setLoc(IDLoc);
-    if (!matchingInlineAsm)
+    if (!MatchingInlineAsm)
       Out.EmitInstruction(Inst);
 
     const char *Repl =
@@ -1577,26 +1563,26 @@ MatchInstruction(SMLoc IDLoc,
   MCInst Inst;
 
   // First, try a direct match.
-  switch (MatchInstructionImpl(Operands, Kind, Inst, MapAndConstraints,
-                               OrigErrorInfo, matchingInlineAsm,
+  switch (MatchInstructionImpl(Operands, Inst,
+                               ErrorInfo, MatchingInlineAsm,
                                isParsingIntelSyntax())) {
   default: break;
   case Match_Success:
     // Some instructions need post-processing to, for example, tweak which
     // encoding is selected. Loop on it while changes happen so the
     // individual transformations can chain off each other.
-    if (!matchingInlineAsm)
+    if (!MatchingInlineAsm)
       while (processInstruction(Inst, Operands))
         ;
 
     Inst.setLoc(IDLoc);
-    if (!matchingInlineAsm)
+    if (!MatchingInlineAsm)
       Out.EmitInstruction(Inst);
     Opcode = Inst.getOpcode();
     return false;
   case Match_MissingFeature:
     Error(IDLoc, "instruction requires a CPU feature not currently enabled",
-          EmptyRanges, matchingInlineAsm);
+          EmptyRanges, MatchingInlineAsm);
     return true;
   case Match_InvalidOperand:
     WasOriginallyInvalidOperand = true;
@@ -1629,24 +1615,18 @@ MatchInstruction(SMLoc IDLoc,
   Tmp[Base.size()] = Suffixes[0];
   unsigned ErrorInfoIgnore;
   unsigned Match1, Match2, Match3, Match4;
-  unsigned tKind;
 
-  MatchInstMapAndConstraints tMapAndConstraints[4];
-  Match1 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[0],
-                                ErrorInfoIgnore, isParsingIntelSyntax());
-  if (Match1 == Match_Success) Kind = tKind;
+  Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+                                isParsingIntelSyntax());
   Tmp[Base.size()] = Suffixes[1];
-  Match2 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[1],
-                                ErrorInfoIgnore, isParsingIntelSyntax());
-  if (Match2 == Match_Success) Kind = tKind;
+  Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+                                isParsingIntelSyntax());
   Tmp[Base.size()] = Suffixes[2];
-  Match3 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[2],
-                                ErrorInfoIgnore, isParsingIntelSyntax());
-  if (Match3 == Match_Success) Kind = tKind;
+  Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+                                isParsingIntelSyntax());
   Tmp[Base.size()] = Suffixes[3];
-  Match4 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[3],
-                                ErrorInfoIgnore, isParsingIntelSyntax());
-  if (Match4 == Match_Success) Kind = tKind;
+  Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+                                isParsingIntelSyntax());
 
   // Restore the old token.
   Op->setTokenValue(Base);
@@ -1659,10 +1639,9 @@ MatchInstruction(SMLoc IDLoc,
     (Match3 == Match_Success) + (Match4 == Match_Success);
   if (NumSuccessfulMatches == 1) {
     Inst.setLoc(IDLoc);
-    if (!matchingInlineAsm)
+    if (!MatchingInlineAsm)
       Out.EmitInstruction(Inst);
     Opcode = Inst.getOpcode();
-    // FIXME: Handle the map and constraints.
     return false;
   }
 
@@ -1689,7 +1668,7 @@ MatchInstruction(SMLoc IDLoc,
       OS << "'" << Base << MatchChars[i] << "'";
     }
     OS << ")";
-    Error(IDLoc, OS.str(), EmptyRanges, matchingInlineAsm);
+    Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
     return true;
   }
 
@@ -1700,28 +1679,28 @@ MatchInstruction(SMLoc IDLoc,
   if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
       (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
     if (!WasOriginallyInvalidOperand) {
-      ArrayRef<SMRange> Ranges = matchingInlineAsm ? EmptyRanges :
+      ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
         Op->getLocRange();
       return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
-                   Ranges, matchingInlineAsm);
+                   Ranges, MatchingInlineAsm);
     }
 
     // Recover location info for the operand if we know which was the problem.
-    if (OrigErrorInfo != ~0U) {
-      if (OrigErrorInfo >= Operands.size())
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction",
-                     EmptyRanges, matchingInlineAsm);
+                     EmptyRanges, MatchingInlineAsm);
 
-      X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo];
+      X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
       if (Operand->getStartLoc().isValid()) {
         SMRange OperandRange = Operand->getLocRange();
         return Error(Operand->getStartLoc(), "invalid operand for instruction",
-                     OperandRange, matchingInlineAsm);
+                     OperandRange, MatchingInlineAsm);
       }
     }
 
     return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
-                 matchingInlineAsm);
+                 MatchingInlineAsm);
   }
 
   // If one instruction matched with a missing feature, report this as a
@@ -1729,7 +1708,7 @@ MatchInstruction(SMLoc IDLoc,
   if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
       (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
     Error(IDLoc, "instruction requires a CPU feature not currently enabled",
-          EmptyRanges, matchingInlineAsm);
+          EmptyRanges, MatchingInlineAsm);
     return true;
   }
 
@@ -1738,13 +1717,13 @@ MatchInstruction(SMLoc IDLoc,
   if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
       (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
     Error(IDLoc, "invalid operand for instruction", EmptyRanges,
-          matchingInlineAsm);
+          MatchingInlineAsm);
     return true;
   }
 
   // If all of these were an outright failure, report it in a useless way.
   Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
-        EmptyRanges, matchingInlineAsm);
+        EmptyRanges, MatchingInlineAsm);
   return true;
 }
 
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 3809f3d3853..0ca1209449f 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -307,7 +307,9 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   };
 
   // This CPU doesnt support long nops. If needed add more.
-  if (CPU == "geode") {
+  // FIXME: Can we get this from the subtarget somehow?
+  if (CPU == "generic" || CPU == "i386" || CPU == "i486" || CPU == "i586" ||
+      CPU == "pentium" || CPU == "pentium-mmx" || CPU == "geode") {
     for (uint64_t i = 0; i < Count; ++i)
       OW->Write8(0x90);
     return true;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index c704ca17013..6b8385db6c6 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86AsmPrinter.h"
-#include "X86MCInstLower.h"
 #include "X86.h"
 #include "X86COFFMachineModuleInfo.h"
 #include "X86MachineFunctionInfo.h"
@@ -693,7 +692,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
         OutStreamer.EmitLabel(Stubs[i].first);
         OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
-                                    TD->getPointerSize(), 0);
+                                    TD->getPointerSize(0), 0);
       }
       Stubs.clear();
     }
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 8acef9dc7ba..e7f817e3a98 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -317,7 +317,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
   bool HasFP = hasFP(MF);
 
   // Calculate amount of bytes used for return address storing.
-  int stackGrowth = -TD->getPointerSize();
+  int stackGrowth = -TD->getPointerSize(0);
 
   // FIXME: This is dirty hack. The code itself is pretty mess right now.
   // It should be rewritten from scratch and generalized sometimes.
@@ -717,7 +717,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   std::vector<MachineMove> &Moves = MMI.getFrameMoves();
   const DataLayout *TD = MF.getTarget().getDataLayout();
   uint64_t NumBytes = 0;
-  int stackGrowth = -TD->getPointerSize();
+  int stackGrowth = -TD->getPointerSize(0);
 
   if (HasFP) {
     // Calculate required stack adjustment.
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5a19f8ab981..0efeef20f2b 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1302,7 +1302,9 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
       // that are not a MemSDNode, and thus don't have proper addrspace info.
       Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
       Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
-      Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+      Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
+      Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
+      Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
     unsigned AddrSpace =
       cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
     // AddrSpace 256 -> GS, 257 -> FS.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7e43e5432d8..2f09e9e6ff1 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -457,6 +457,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
   }
   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
+  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
+  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+  // support continuation, user-level threading, and etc.. As a result, not
+  // other SjLj exception interfaces are implemented and please don't build
+  // your own exception handling based on them.
+  // LLVM/Clang supports zero-cost DWARF exception handling.
+  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 
   // Darwin ABI issue.
   setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
@@ -939,6 +947,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
 
+    setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
+    setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
+
     setLoadExtAction(ISD::EXTLOAD,              MVT::v2f32, Legal);
   }
 
@@ -2649,7 +2660,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
   unsigned StackAlignment = TFI.getStackAlignment();
   uint64_t AlignMask = StackAlignment - 1;
   int64_t Offset = StackSize;
-  uint64_t SlotSize = TD->getPointerSize();
+  uint64_t SlotSize = TD->getPointerSize(0);
   if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
     // Number smaller than 12 so just add the difference.
     Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
@@ -3017,7 +3028,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
 
   if (ReturnAddrIndex == 0) {
     // Set up a frame object for the return address.
-    uint64_t SlotSize = TD->getPointerSize();
+    uint64_t SlotSize = TD->getPointerSize(0);
     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
                                                            false);
     FuncInfo->setRAIndex(ReturnAddrIndex);
@@ -5161,86 +5172,6 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
-// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
-// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
-// constraint of matching input/output vector elements.
-SDValue
-X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
-  SDNode *N = Op.getNode();
-  EVT VT = Op.getValueType();
-  unsigned NumElts = Op.getNumOperands();
-
-  // Check supported types and sub-targets.
-  //
-  // Only v2f32 -> v2f64 needs special handling.
-  if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
-    return SDValue();
-
-  SDValue VecIn;
-  EVT VecInVT;
-  SmallVector<int, 8> Mask;
-  EVT SrcVT = MVT::Other;
-
-  // Check the patterns could be translated into X86vfpext.
-  for (unsigned i = 0; i < NumElts; ++i) {
-    SDValue In = N->getOperand(i);
-    unsigned Opcode = In.getOpcode();
-
-    // Skip if the element is undefined.
-    if (Opcode == ISD::UNDEF) {
-      Mask.push_back(-1);
-      continue;
-    }
-
-    // Quit if one of the elements is not defined from 'fpext'.
-    if (Opcode != ISD::FP_EXTEND)
-      return SDValue();
-
-    // Check how the source of 'fpext' is defined.
-    SDValue L2In = In.getOperand(0);
-    EVT L2InVT = L2In.getValueType();
-
-    // Check the original type
-    if (SrcVT == MVT::Other)
-      SrcVT = L2InVT;
-    else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
-      return SDValue();
-
-    // Check whether the value being 'fpext'ed is extracted from the same
-    // source.
-    Opcode = L2In.getOpcode();
-
-    // Quit if it's not extracted with a constant index.
-    if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
-        !isa<ConstantSDNode>(L2In.getOperand(1)))
-      return SDValue();
-
-    SDValue ExtractedFromVec = L2In.getOperand(0);
-
-    if (VecIn.getNode() == 0) {
-      VecIn = ExtractedFromVec;
-      VecInVT = ExtractedFromVec.getValueType();
-    } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
-      return SDValue();
-
-    Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
-  }
-
-  // Quit if all operands of BUILD_VECTOR are undefined.
-  if (!VecIn.getNode())
-    return SDValue();
-
-  // Fill the remaining mask as undef.
-  for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
-    Mask.push_back(-1);
-
-  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
-                     DAG.getVectorShuffle(VecInVT, DL,
-                                          VecIn, DAG.getUNDEF(VecInVT),
-                                          &Mask[0]));
-}
-
 SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
@@ -5273,10 +5204,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   if (Broadcast.getNode())
     return Broadcast;
 
-  SDValue FpExt = LowerVectorFpExtend(Op, DAG);
-  if (FpExt.getNode())
-    return FpExt;
-
   unsigned EVTBits = ExtVT.getSizeInBits();
 
   unsigned NumZero  = 0;
@@ -7724,7 +7651,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
       IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
                         false, false, false, 0);
 
-    SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()),
+    SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize(0)),
                                     getPointerTy());
     IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
 
@@ -8215,6 +8142,20 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
   return FIST;
 }
 
+SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  SDValue In = Op.getOperand(0);
+  EVT SVT = In.getValueType();
+
+  assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
+
+  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+                     DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
+                                 In, DAG.getUNDEF(SVT)));
+}
+
 SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
   LLVMContext *Context = DAG.getContext();
   DebugLoc dl = Op.getDebugLoc();
@@ -9215,6 +9156,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  // X86 doesn't have an i8 cmov. If both operands are the result of a truncate
+  // widen the cmov and push the truncate through. This avoids introducing a new
+  // branch during isel and doesn't add any extensions.
+  if (Op.getValueType() == MVT::i8 &&
+      Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
+    SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
+    if (T1.getValueType() == T2.getValueType() &&
+        // Blacklist CopyFromReg to avoid partial register stalls.
+        T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
+      SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
+      SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond);
+      return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
+    }
+  }
+
   // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
   // condition is true.
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
@@ -10345,7 +10301,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset =
-      DAG.getConstant(TD->getPointerSize(),
+      DAG.getConstant(TD->getPointerSize(0),
                       Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
@@ -10377,7 +10333,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
                                                      SelectionDAG &DAG) const {
-  return DAG.getIntPtrConstant(2*TD->getPointerSize());
+  return DAG.getIntPtrConstant(2*TD->getPointerSize(0));
 }
 
 SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
@@ -10392,7 +10348,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
 
   SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
-                                  DAG.getIntPtrConstant(TD->getPointerSize()));
+                                  DAG.getIntPtrConstant(TD->getPointerSize(0)));
   StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
                        false, false, 0);
@@ -10403,6 +10359,21 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
                      Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
 }
 
+SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
+                     DAG.getVTList(MVT::i32, MVT::Other),
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
 static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
   return Op.getOperand(0);
 }
@@ -11407,6 +11378,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
   case ISD::FP_TO_UINT:         return LowerFP_TO_UINT(Op, DAG);
+  case ISD::FP_EXTEND:          return lowerFP_EXTEND(Op, DAG);
   case ISD::FABS:               return LowerFABS(Op, DAG);
   case ISD::FNEG:               return LowerFNEG(Op, DAG);
   case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
@@ -11426,6 +11398,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
                                 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::EH_RETURN:          return LowerEH_RETURN(Op, DAG);
+  case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
@@ -11535,6 +11509,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     }
     return;
   }
+  case ISD::FP_ROUND: {
+    SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+    Results.push_back(V);
+    return;
+  }
   case ISD::READCYCLECOUNTER: {
     SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue TheChain = N->getOperand(0);
@@ -11713,6 +11692,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
   case X86ISD::TLSBASEADDR:        return "X86ISD::TLSBASEADDR";
   case X86ISD::TLSCALL:            return "X86ISD::TLSCALL";
+  case X86ISD::EH_SJLJ_SETJMP:     return "X86ISD::EH_SJLJ_SETJMP";
+  case X86ISD::EH_SJLJ_LONGJMP:    return "X86ISD::EH_SJLJ_LONGJMP";
   case X86ISD::EH_RETURN:          return "X86ISD::EH_RETURN";
   case X86ISD::TC_RETURN:          return "X86ISD::TC_RETURN";
   case X86ISD::FNSTCW16m:          return "X86ISD::FNSTCW16m";
@@ -11729,6 +11710,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VSEXT_MOVL:         return "X86ISD::VSEXT_MOVL";
   case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
   case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
+  case X86ISD::VFPROUND:           return "X86ISD::VFPROUND";
   case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
   case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
   case X86ISD::VSHL:               return "X86ISD::VSHL";
@@ -12389,12 +12371,9 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
   // Hi
   MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
   for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
-    if (i == X86::AddrDisp) {
+    if (i == X86::AddrDisp)
       MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
-      // Don't forget to transfer the target flag.
-      MachineOperand &MO = MIB->getOperand(MIB->getNumOperands()-1);
-      MO.setTargetFlags(MI->getOperand(MemOpndSlot + i).getTargetFlags());
-    } else
+    else
       MIB.addOperand(MI->getOperand(MemOpndSlot + i));
   }
   MIB.setMemRefs(MMOBegin, MMOEnd);
@@ -13261,6 +13240,173 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
 }
 
 MachineBasicBlock *
+X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+                                    MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  const BasicBlock *BB = MBB->getBasicBlock();
+  MachineFunction::iterator I = MBB;
+  ++I;
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+  unsigned DstReg;
+  unsigned MemOpndSlot = 0;
+
+  unsigned CurOp = 0;
+
+  DstReg = MI->getOperand(CurOp++).getReg();
+  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+  assert(RC->hasType(MVT::i32) && "Invalid destination!");
+  unsigned mainDstReg = MRI.createVirtualRegister(RC);
+  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+  MemOpndSlot = CurOp;
+
+  MVT PVT = getPointerTy();
+  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+         "Invalid Pointer Size!");
+
+  // For v = setjmp(buf), we generate
+  //
+  // thisMBB:
+  //  buf[Label_Offset] = ljMBB
+  //  SjLjSetup restoreMBB
+  //
+  // mainMBB:
+  //  v_main = 0
+  //
+  // sinkMBB:
+  //  v = phi(main, restore)
+  //
+  // restoreMBB:
+  //  v_restore = 1
+
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
+  MF->insert(I, mainMBB);
+  MF->insert(I, sinkMBB);
+  MF->push_back(restoreMBB);
+
+  MachineInstrBuilder MIB;
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), MBB,
+                  llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // thisMBB:
+  unsigned PtrImmStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
+  const int64_t Label_Offset = 1 * PVT.getStoreSize();
+
+  // Store IP
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrImmStoreOpc));
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    if (i == X86::AddrDisp)
+      MIB.addDisp(MI->getOperand(MemOpndSlot + i), Label_Offset);
+    else
+      MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+  }
+  MIB.addMBB(restoreMBB);
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+  // Setup
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
+          .addMBB(restoreMBB);
+  MIB.addRegMask(RegInfo->getNoPreservedMask());
+  thisMBB->addSuccessor(mainMBB);
+  thisMBB->addSuccessor(restoreMBB);
+
+  // mainMBB:
+  //  EAX = 0
+  BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg);
+  mainMBB->addSuccessor(sinkMBB);
+
+  // sinkMBB:
+  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+          TII->get(X86::PHI), DstReg)
+    .addReg(mainDstReg).addMBB(mainMBB)
+    .addReg(restoreDstReg).addMBB(restoreMBB);
+
+  // restoreMBB:
+  BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
+  BuildMI(restoreMBB, DL, TII->get(X86::JMP_4)).addMBB(sinkMBB);
+  restoreMBB->addSuccessor(sinkMBB);
+
+  MI->eraseFromParent();
+  return sinkMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+                                     MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+  MVT PVT = getPointerTy();
+  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+         "Invalid Pointer Size!");
+
+  const TargetRegisterClass *RC =
+    (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
+  unsigned Tmp = MRI.createVirtualRegister(RC);
+  // Since FP is only updated here but NOT referenced, it's treated as GPR.
+  unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
+  unsigned SP = RegInfo->getStackRegister();
+
+  MachineInstrBuilder MIB;
+
+  const int64_t Label_Offset = 1 * PVT.getStoreSize();
+  const int64_t SP_Offset = 2 * PVT.getStoreSize();
+
+  unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
+  unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
+
+  // Reload FP
+  MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+    MIB.addOperand(MI->getOperand(i));
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+  // Reload IP
+  MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    if (i == X86::AddrDisp)
+      MIB.addDisp(MI->getOperand(i), Label_Offset);
+    else
+      MIB.addOperand(MI->getOperand(i));
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+  // Reload SP
+  MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    if (i == X86::AddrDisp)
+      MIB.addDisp(MI->getOperand(i), SP_Offset);
+    else
+      MIB.addOperand(MI->getOperand(i));
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+  // Jump
+  BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
+
+  MI->eraseFromParent();
+  return MBB;
+}
+
+MachineBasicBlock *
 X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
   switch (MI->getOpcode()) {
@@ -13475,6 +13621,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
   case X86::VAARG_64:
     return EmitVAARG64WithCustomInserter(MI, BB);
+
+  case X86::EH_SjLj_SetJmp32:
+  case X86::EH_SjLj_SetJmp64:
+    return emitEHSjLjSetJmp(MI, BB);
+
+  case X86::EH_SjLj_LongJmp32:
+  case X86::EH_SjLj_LongJmp64:
+    return emitEHSjLjLongJmp(MI, BB);
   }
 }
 
@@ -14478,6 +14632,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
       if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
         CC = X86::GetOppositeBranchCondition(CC);
         std::swap(TrueC, FalseC);
+        std::swap(TrueOp, FalseOp);
       }
 
       // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3.  Likewise for any pow2/0.
@@ -14560,6 +14715,46 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
       }
     }
   }
+
+  // Handle these cases:
+  //   (select (x != c), e, c) -> select (x != c), e, x),
+  //   (select (x == c), c, e) -> select (x == c), x, e)
+  // where the c is an integer constant, and the "select" is the combination
+  // of CMOV and CMP.
+  //
+  // The rationale for this change is that the conditional-move from a constant
+  // needs two instructions, however, conditional-move from a register needs
+  // only one instruction.
+  //
+  // CAVEAT: By replacing a constant with a symbolic value, it may obscure
+  //  some instruction-combining opportunities. This opt needs to be
+  //  postponed as late as possible.
+  //
+  if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) {
+    // the DCI.xxxx conditions are provided to postpone the optimization as
+    // late as possible.
+
+    ConstantSDNode *CmpAgainst = 0;
+    if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
+        (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
+        dyn_cast<ConstantSDNode>(Cond.getOperand(0)) == 0) {
+
+      if (CC == X86::COND_NE &&
+          CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
+        CC = X86::GetOppositeBranchCondition(CC);
+        std::swap(TrueOp, FalseOp);
+      }
+
+      if (CC == X86::COND_E &&
+          CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
+        SDValue Ops[] = { FalseOp, Cond.getOperand(0),
+                          DAG.getConstant(CC, MVT::i8), Cond };
+        return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops,
+                           array_lengthof(Ops));
+      }
+    }
+  }
+
   return SDValue();
 }
 
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 1cae7ed2681..40e966ad676 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -217,6 +217,12 @@ namespace llvm {
       // EH_RETURN - Exception Handling helpers.
       EH_RETURN,
 
+      // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+      EH_SJLJ_SETJMP,
+
+      // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+      EH_SJLJ_LONGJMP,
+
       /// TC_RETURN - Tail call return.
       ///   operand #0 chain
       ///   operand #1 callee (register or absolute)
@@ -233,6 +239,9 @@ namespace llvm {
       // VFPEXT - Vector FP extend.
       VFPEXT,
 
+      // VFPROUND - Vector FP round.
+      VFPROUND,
+
       // VSHL, VSRL - 128-bit vector logical left / right shift
       VSHLDQ, VSRLDQ,
 
@@ -788,6 +797,7 @@ namespace llvm {
     SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
@@ -806,6 +816,8 @@ namespace llvm {
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
@@ -818,8 +830,6 @@ namespace llvm {
 
     SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
 
-    SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
-
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
@@ -904,6 +914,12 @@ namespace llvm {
     MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
                                           MachineBasicBlock *BB) const;
 
+    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+                                        MachineBasicBlock *MBB) const;
+
+    MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+                                         MachineBasicBlock *MBB) const;
+
     /// Emit nodes that will be selected as "test Op0,Op0", or something
     /// equivalent, for use with the given x86 condition code.
     SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index f27b6f7f53a..9e6f27988f7 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -165,6 +165,33 @@ def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
 
 }
 
+let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+    usesCustomInserter = 1 in {
+  def EH_SjLj_SetJmp32  : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
+                            "#EH_SJLJ_SETJMP32",
+                            [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
+                          Requires<[In32BitMode]>;
+  def EH_SjLj_SetJmp64  : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
+                            "#EH_SJLJ_SETJMP64",
+                            [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
+                          Requires<[In64BitMode]>;
+  let isTerminator = 1 in {
+  def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
+                            "#EH_SJLJ_LONGJMP32",
+                            [(X86eh_sjlj_longjmp addr:$buf)]>,
+                          Requires<[In32BitMode]>;
+  def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),
+                            "#EH_SJLJ_LONGJMP64",
+                            [(X86eh_sjlj_longjmp addr:$buf)]>,
+                          Requires<[In64BitMode]>;
+  }
+}
+
+let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
+  def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
+                        "#EH_SjLj_Setup\t$dst", []>;
+}
+
 //===----------------------------------------------------------------------===//
 // Pseudo instructions used by segmented stacks.
 //
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 90354354367..46281efa571 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -93,6 +93,9 @@ def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
 def X86vfpext  : SDNode<"X86ISD::VFPEXT",
                         SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
                                              SDTCisFP<0>, SDTCisFP<1>]>>;
+def X86vfpround: SDNode<"X86ISD::VFPROUND",
+                        SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                             SDTCisFP<0>, SDTCisFP<1>]>>;
 
 def X86vshldq  : SDNode<"X86ISD::VSHLDQ",    SDTIntShiftOp>;
 def X86vshrdq  : SDNode<"X86ISD::VSRLDQ",    SDTIntShiftOp>;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 5c6084fe008..2f637685b3f 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -216,6 +216,14 @@ def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
 def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
                         [SDNPHasChain]>;
 
+def X86eh_sjlj_setjmp  : SDNode<"X86ISD::EH_SJLJ_SETJMP",
+                                SDTypeProfile<1, 1, [SDTCisInt<0>,
+                                                     SDTCisPtrTy<1>]>,
+                                [SDNPHasChain, SDNPSideEffect]>;
+def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP",
+                                SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+                                [SDNPHasChain, SDNPSideEffect]>;
+
 def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
                         [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
 
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2aa4f3f4dbb..cc1291a8a0f 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2125,6 +2125,10 @@ let Predicates = [HasAVX] in {
             (VCVTDQ2PSYrm addr:$src)>;
 
   // Match fround and fextend for 128/256-bit conversions
+  def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
+            (VCVTPD2PSrr VR128:$src)>;
+  def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
+            (VCVTPD2PSXrm addr:$src)>;
   def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
             (VCVTPD2PSYrr VR256:$src)>;
   def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
@@ -2139,7 +2143,12 @@ let Predicates = [HasAVX] in {
 }
 
 let Predicates = [UseSSE2] in {
-  // Match fextend for 128 conversions
+  // Match fround and fextend for 128 conversions
+  def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
+            (CVTPD2PSrr VR128:$src)>;
+  def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
+            (CVTPD2PSrm addr:$src)>;
+
   def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
             (CVTPS2PDrr VR128:$src)>;
 }
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 6e4db73c3a9..c44549c30ac 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86MCInstLower.h"
 #include "X86AsmPrinter.h"
 #include "X86COFFMachineModuleInfo.h"
 #include "InstPrinter/X86ATTInstPrinter.h"
@@ -29,6 +28,31 @@
 #include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
+namespace {
+
+/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
+class X86MCInstLower {
+  MCContext &Ctx;
+  Mangler *Mang;
+  const MachineFunction &MF;
+  const TargetMachine &TM;
+  const MCAsmInfo &MAI;
+  X86AsmPrinter &AsmPrinter;
+public:
+  X86MCInstLower(Mangler *mang, const MachineFunction &MF,
+                 X86AsmPrinter &asmprinter);
+
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+private:
+  MachineModuleInfoMachO &getMachOMMI() const;
+};
+
+} // end anonymous namespace
+
 X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
                                X86AsmPrinter &asmprinter)
 : Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
deleted file mode 100644
index b4d4cfd301a..00000000000
--- a/lib/Target/X86/X86MCInstLower.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- X86MCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86_MCINSTLOWER_H
-#define X86_MCINSTLOWER_H
-
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-  class MCAsmInfo;
-  class MCContext;
-  class MCInst;
-  class MCOperand;
-  class MCSymbol;
-  class MachineInstr;
-  class MachineFunction;
-  class MachineModuleInfoMachO;
-  class MachineOperand;
-  class Mangler;
-  class TargetMachine;
-  class X86AsmPrinter;
-
-/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
-  MCContext &Ctx;
-  Mangler *Mang;
-  const MachineFunction &MF;
-  const TargetMachine &TM;
-  const MCAsmInfo &MAI;
-  X86AsmPrinter &AsmPrinter;
-public:
-  X86MCInstLower(Mangler *mang, const MachineFunction &MF,
-                 X86AsmPrinter &asmprinter);
-
-  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
-
-  MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
-  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-
-private:
-  MachineModuleInfoMachO &getMachOMMI() const;
-};
-
-}
-
-#endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c840ea21a89..4bcf6b1f19e 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -261,6 +261,11 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
   return CSR_64_RegMask;
 }
 
+const uint32_t*
+X86RegisterInfo::getNoPreservedMask() const {
+  return CSR_NoRegs_RegMask;
+}
+
 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 0287fa22062..7932ede8dd6 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -100,6 +100,7 @@ public:
   /// callee-save registers on this target.
   const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
   const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+  const uint32_t *getNoPreservedMask() const;
 
   /// getReservedRegs - Returns a bitset indexed by physical register number
   /// indicating if a register is a special register that has particular uses and
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f8cced885d1..655ede79ba3 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -48,7 +48,8 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
     InstrInfo(*this),
     TSInfo(*this),
     TLInfo(*this),
-    JITInfo(*this) {
+    JITInfo(*this),
+    STTI(&TLInfo) {
 }
 
 void X86_64TargetMachine::anchor() { }
@@ -64,7 +65,8 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
     InstrInfo(*this),
     TSInfo(*this),
     TLInfo(*this),
-    JITInfo(*this) {
+    JITInfo(*this),
+    STTI(&TLInfo) {
 }
 
 /// X86TargetMachine ctor - Create an X86 target.
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 5301299c1f3..4bad695b4c4 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -25,6 +25,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
 
@@ -85,6 +86,8 @@ class X86_32TargetMachine : public X86TargetMachine {
   X86SelectionDAGInfo TSInfo;
   X86TargetLowering TLInfo;
   X86JITInfo        JITInfo;
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformImpl VTTI;
 public:
   X86_32TargetMachine(const Target &T, StringRef TT,
                       StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -103,6 +106,12 @@ public:
   virtual       X86JITInfo       *getJITInfo()         {
     return &JITInfo;
   }
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
 };
 
 /// X86_64TargetMachine - X86 64-bit target machine.
@@ -114,6 +123,8 @@ class X86_64TargetMachine : public X86TargetMachine {
   X86SelectionDAGInfo TSInfo;
   X86TargetLowering TLInfo;
   X86JITInfo        JITInfo;
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformImpl VTTI;
 public:
   X86_64TargetMachine(const Target &T, StringRef TT,
                       StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -132,6 +143,12 @@ public:
   virtual       X86JITInfo       *getJITInfo()         {
     return &JITInfo;
   }
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index c71d978ad81..0b7e3e10d4b 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
     InstrInfo(),
     FrameLowering(Subtarget),
     TLInfo(*this),
-    TSInfo(*this) {
+    TSInfo(*this), STTI(&TLInfo) {
 }
 
 namespace {
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index f7fec29f544..c60c6a37f95 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -20,6 +20,7 @@
 #include "XCoreISelLowering.h"
 #include "XCoreSelectionDAGInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetTransformImpl.h"
 #include "llvm/DataLayout.h"
 
 namespace llvm {
@@ -31,6 +32,8 @@ class XCoreTargetMachine : public LLVMTargetMachine {
   XCoreFrameLowering FrameLowering;
   XCoreTargetLowering TLInfo;
   XCoreSelectionDAGInfo TSInfo;
+  ScalarTargetTransformImpl STTI;
+  VectorTargetTransformImpl VTTI;
 public:
   XCoreTargetMachine(const Target &T, StringRef TT,
                      StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -53,6 +56,12 @@ public:
   virtual const TargetRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
+  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+    return &STTI;
+  }
+  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+    return &VTTI;
+  }
   virtual const DataLayout       *getDataLayout() const { return &DL; }
 
   // Pass Pipeline Configuration
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 6f6ff9ca2d5..8a0274b5ff7 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -518,8 +518,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   const AttrListPtr &PAL = F->getAttributes();
 
   // Add any return attributes.
-  if (Attributes attrs = PAL.getRetAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+  Attributes attrs = PAL.getRetAttributes();
+  if (attrs.hasAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                                                    attrs));
 
   // First, determine the new argument list
   unsigned ArgIndex = 1;
@@ -535,7 +537,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     } else if (!ArgsToPromote.count(I)) {
       // Unchanged argument
       Params.push_back(I->getType());
-      if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
+      Attributes attrs = PAL.getParamAttributes(ArgIndex);
+      if (attrs.hasAttributes())
         AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
     } else if (I->use_empty()) {
       // Dead argument (which are always marked as promotable)
@@ -588,8 +591,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   }
 
   // Add any function attributes.
-  if (Attributes attrs = PAL.getFnAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+  attrs = PAL.getFnAttributes();
+  if (attrs.hasAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                    attrs));
 
   Type *RetTy = FTy->getReturnType();
 
@@ -634,8 +639,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     const AttrListPtr &CallPAL = CS.getAttributes();
 
     // Add any return attributes.
-    if (Attributes attrs = CallPAL.getRetAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+    Attributes attrs = CallPAL.getRetAttributes();
+    if (attrs.hasAttributes())
+      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                                                      attrs));
 
     // Loop over the operands, inserting GEP and loads in the caller as
     // appropriate.
@@ -646,7 +653,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
       if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
         Args.push_back(*AI);          // Unmodified argument
 
-        if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+        Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
+        if (Attrs.hasAttributes())
           AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
 
       } else if (ByValArgsToTransform.count(I)) {
@@ -707,13 +715,16 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     // Push any varargs arguments on the list.
     for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
       Args.push_back(*AI);
-      if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+      Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
+      if (Attrs.hasAttributes())
         AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
     }
 
     // Add any function attributes.
-    if (Attributes attrs = CallPAL.getFnAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+    attrs = CallPAL.getFnAttributes();
+    if (attrs.hasAttributes())
+      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                      attrs));
 
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index b107669b177..fc22548db70 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -276,8 +276,10 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
       SmallVector<AttributeWithIndex, 8> AttributesVec;
       for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
         AttributesVec.push_back(PAL.getSlot(i));
-      if (Attributes FnAttrs = PAL.getFnAttributes())
-        AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+      Attributes FnAttrs = PAL.getFnAttributes();
+      if (FnAttrs.hasAttributes())
+        AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                        FnAttrs));
       PAL = AttrListPtr::get(AttributesVec);
     }
 
@@ -762,13 +764,17 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // here. Currently, this should not be possible, but special handling might be
   // required when new return value attributes are added.
   if (NRetTy->isVoidTy())
-    RAttrs &= ~Attributes::typeIncompatible(NRetTy);
+    RAttrs =
+      Attributes::get(NRetTy->getContext(), AttrBuilder(RAttrs).
+                      removeAttributes(Attributes::typeIncompatible(NRetTy)));
   else
-    assert((RAttrs & Attributes::typeIncompatible(NRetTy)) == 0
-           && "Return attributes no longer compatible?");
+    assert(!AttrBuilder(RAttrs).
+             hasAttributes(Attributes::typeIncompatible(NRetTy)) &&
+           "Return attributes no longer compatible?");
 
-  if (RAttrs)
-    AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+  if (RAttrs.hasAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                                                    RAttrs));
 
   // Remember which arguments are still alive.
   SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
@@ -785,7 +791,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
 
       // Get the original parameter attributes (skipping the first one, that is
       // for the return value.
-      if (Attributes Attrs = PAL.getParamAttributes(i + 1))
+      Attributes Attrs = PAL.getParamAttributes(i + 1);
+      if (Attrs.hasAttributes())
         AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
     } else {
       ++NumArgumentsEliminated;
@@ -795,7 +802,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   }
 
   if (FnAttrs.hasAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                    FnAttrs));
 
   // Reconstruct the AttributesList based on the vector we constructed.
   AttrListPtr NewPAL = AttrListPtr::get(AttributesVec);
@@ -831,9 +839,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     Attributes RAttrs = CallPAL.getRetAttributes();
     Attributes FnAttrs = CallPAL.getFnAttributes();
     // Adjust in case the function was changed to return void.
-    RAttrs &= ~Attributes::typeIncompatible(NF->getReturnType());
-    if (RAttrs)
-      AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+    RAttrs =
+      Attributes::get(NF->getContext(), AttrBuilder(RAttrs).
+           removeAttributes(Attributes::typeIncompatible(NF->getReturnType())));
+    if (RAttrs.hasAttributes())
+      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                                                      RAttrs));
 
     // Declare these outside of the loops, so we can reuse them for the second
     // loop, which loops the varargs.
@@ -845,19 +856,22 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       if (ArgAlive[i]) {
         Args.push_back(*I);
         // Get original parameter attributes, but skip return attributes.
-        if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+        Attributes Attrs = CallPAL.getParamAttributes(i + 1);
+        if (Attrs.hasAttributes())
           AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
       }
 
     // Push any varargs arguments on the list. Don't forget their attributes.
     for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
       Args.push_back(*I);
-      if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+      Attributes Attrs = CallPAL.getParamAttributes(i + 1);
+      if (Attrs.hasAttributes())
         AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
     }
 
     if (FnAttrs.hasAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                      FnAttrs));
 
     // Reconstruct the AttributesList based on the vector we constructed.
     AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec);
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 43e12d44441..d8f374c330e 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -212,15 +212,17 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
     MadeChange = true;
 
     // Clear out any existing attributes.
-    Attributes::Builder B;
+    AttrBuilder B;
     B.addAttribute(Attributes::ReadOnly)
       .addAttribute(Attributes::ReadNone);
-    F->removeAttribute(~0, Attributes::get(B));
+    F->removeAttribute(AttrListPtr::FunctionIndex,
+                       Attributes::get(F->getContext(), B));
 
     // Add in the new attribute.
     B.clear();
     B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone);
-    F->addAttribute(~0, Attributes::get(B));
+    F->addAttribute(AttrListPtr::FunctionIndex,
+                    Attributes::get(F->getContext(), B));
 
     if (ReadsMemory)
       ++NumReadOnly;
@@ -355,7 +357,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
 
   ArgumentGraph AG;
 
-  Attributes::Builder B;
+  AttrBuilder B;
   B.addAttribute(Attributes::NoCapture);
 
   // Check each function in turn, determining which pointer arguments are not
@@ -379,7 +381,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
       for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
            A != E; ++A) {
         if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
-          A->addAttr(Attributes::get(B));
+          A->addAttr(Attributes::get(F->getContext(), B));
           ++NumNoCapture;
           Changed = true;
         }
@@ -394,7 +396,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
         if (!Tracker.Captured) {
           if (Tracker.Uses.empty()) {
             // If it's trivially not captured, mark it nocapture now.
-            A->addAttr(Attributes::get(B));
+            A->addAttr(Attributes::get(F->getContext(), B));
             ++NumNoCapture;
             Changed = true;
           } else {
@@ -427,7 +429,9 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
       // eg. "void f(int* x) { if (...) f(x); }"
       if (ArgumentSCC[0]->Uses.size() == 1 &&
           ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
-        ArgumentSCC[0]->Definition->addAttr(Attributes::get(B));
+        ArgumentSCC[0]->
+          Definition->
+          addAttr(Attributes::get(ArgumentSCC[0]->Definition->getContext(), B));
         ++NumNoCapture;
         Changed = true;
       }
@@ -469,7 +473,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
 
     for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
       Argument *A = ArgumentSCC[i]->Definition;
-      A->addAttr(Attributes::get(B));
+      A->addAttr(Attributes::get(A->getContext(), B));
       ++NumNoCapture;
       Changed = true;
     }
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index a1b976577a7..678189b3d6c 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2061,28 +2061,26 @@ static void ChangeCalleesToFastCall(Function *F) {
   }
 }
 
-static AttrListPtr StripNest(const AttrListPtr &Attrs) {
-  Attributes::Builder B;
-  B.addAttribute(Attributes::Nest);
-
+static AttrListPtr StripNest(LLVMContext &C, const AttrListPtr &Attrs) {
   for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
     if (!Attrs.getSlot(i).Attrs.hasAttribute(Attributes::Nest))
       continue;
 
     // There can be only one.
-    return Attrs.removeAttr(Attrs.getSlot(i).Index, Attributes::get(B));
+    return Attrs.removeAttr(C, Attrs.getSlot(i).Index,
+                            Attributes::get(C, Attributes::Nest));
   }
 
   return Attrs;
 }
 
 static void RemoveNestAttribute(Function *F) {
-  F->setAttributes(StripNest(F->getAttributes()));
+  F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
   for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
     if (isa<BlockAddress>(*UI))
       continue;
     CallSite User(cast<Instruction>(*UI));
-    User.setAttributes(StripNest(User.getAttributes()));
+    User.setAttributes(StripNest(F->getContext(), User.getAttributes()));
   }
 }
 
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 6233922db92..86c76f0c0a0 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -1,4 +1,4 @@
-//===-- Scalar.cpp --------------------------------------------------------===//
+//===-- IPO.cpp -----------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 3e598abfcf6..fb4ecbfe7b0 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -137,7 +137,7 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
   // If the SCC doesn't unwind or doesn't throw, note this fact.
   if (!SCCMightUnwind || !SCCMightReturn)
     for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-      Attributes::Builder NewAttributes;
+      AttrBuilder NewAttributes;
 
       if (!SCCMightUnwind)
         NewAttributes.addAttribute(Attributes::NoUnwind);
@@ -146,7 +146,9 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
 
       Function *F = (*I)->getFunction();
       const AttrListPtr &PAL = F->getAttributes();
-      const AttrListPtr &NPAL = PAL.addAttr(~0, Attributes::get(NewAttributes));
+      const AttrListPtr &NPAL = PAL.addAttr(F->getContext(), ~0,
+                                            Attributes::get(F->getContext(),
+                                                            NewAttributes));
       if (PAL != NPAL) {
         MadeChange = true;
         F->setAttributes(NPAL);
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 325bb20fbe8..41017c52879 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/TargetFolder.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 
 namespace llvm {
   class CallSite;
@@ -74,6 +75,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
   DataLayout *TD;
   TargetLibraryInfo *TLI;
   bool MadeIRChange;
+  LibCallSimplifier *Simplifier;
 public:
   /// Worklist - All of the instructions that need to be simplified.
   InstCombineWorklist Worklist;
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f92c4baeba7..5ad6f9111c8 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -111,10 +111,13 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
         // get the TBAA tag describing our copy.
         if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
           if (M->getNumOperands() == 3 &&
+              M->getOperand(0) &&
               isa<ConstantInt>(M->getOperand(0)) &&
               cast<ConstantInt>(M->getOperand(0))->isNullValue() &&
+              M->getOperand(1) &&
               isa<ConstantInt>(M->getOperand(1)) &&
               cast<ConstantInt>(M->getOperand(1))->getValue() == Size &&
+              M->getOperand(2) &&
               isa<MDNode>(M->getOperand(2)))
             CopyMD = cast<MDNode>(M->getOperand(2));
         }
@@ -775,39 +778,6 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
   return true;
 }
 
-namespace {
-class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
-  InstCombiner *IC;
-protected:
-  void replaceCall(Value *With) {
-    NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
-  }
-  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
-    if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
-      return true;
-    if (ConstantInt *SizeCI =
-                           dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
-      if (SizeCI->isAllOnesValue())
-        return true;
-      if (isString) {
-        uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
-        // If the length is 0 we don't know how long it is and so we can't
-        // remove the check.
-        if (Len == 0) return false;
-        return SizeCI->getZExtValue() >= Len;
-      }
-      if (ConstantInt *Arg = dyn_cast<ConstantInt>(
-                                                  CI->getArgOperand(SizeArgOp)))
-        return SizeCI->getZExtValue() >= Arg->getZExtValue();
-    }
-    return false;
-  }
-public:
-  InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
-  Instruction *NewInstruction;
-};
-} // end anonymous namespace
-
 // Try to fold some different type of calls here.
 // Currently we're only working with the checking functions, memcpy_chk,
 // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
@@ -815,9 +785,10 @@ public:
 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) {
   if (CI->getCalledFunction() == 0) return 0;
 
-  InstCombineFortifiedLibCalls Simplifier(this);
-  Simplifier.fold(CI, TD, TLI);
-  return Simplifier.NewInstruction;
+  if (Value *With = Simplifier->optimizeCall(CI))
+    return ReplaceInstUsesWith(*CI, With);
+
+  return 0;
 }
 
 static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
@@ -1036,7 +1007,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
       return false;   // Cannot transform this return value.
 
     if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
-      Attributes::Builder RAttrs = CallerPAL.getRetAttributes();
+      AttrBuilder RAttrs = CallerPAL.getRetAttributes();
       if (RAttrs.hasAttributes(Attributes::typeIncompatible(NewRetTy)))
         return false;   // Attribute not compatible with transformed value.
     }
@@ -1067,7 +1038,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
       return false;   // Cannot transform this parameter value.
 
     Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
-    if (Attrs & Attributes::typeIncompatible(ParamTy))
+    if (AttrBuilder(Attrs).
+          hasAttributes(Attributes::typeIncompatible(ParamTy)))
       return false;   // Attribute not compatible with transformed value.
 
     // If the parameter is passed as a byval argument, then we have to have a
@@ -1137,7 +1109,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   attrVec.reserve(NumCommonArgs);
 
   // Get any return attributes.
-  Attributes::Builder RAttrs = CallerPAL.getRetAttributes();
+  AttrBuilder RAttrs = CallerPAL.getRetAttributes();
 
   // If the return value is not being used, the type may not be compatible
   // with the existing attributes.  Wipe out any problematic attributes.
@@ -1145,7 +1117,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
   // Add the new return attributes.
   if (RAttrs.hasAttributes())
-    attrVec.push_back(AttributeWithIndex::get(0, Attributes::get(RAttrs)));
+    attrVec.push_back(
+      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                              Attributes::get(FT->getContext(), RAttrs)));
 
   AI = CS.arg_begin();
   for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
@@ -1159,7 +1133,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     }
 
     // Add any parameter attributes.
-    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+    Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+    if (PAttrs.hasAttributes())
       attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
   }
 
@@ -1187,14 +1162,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
         }
 
         // Add any parameter attributes.
-        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+        Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+        if (PAttrs.hasAttributes())
           attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
       }
     }
   }
 
-  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
-    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+  Attributes FnAttrs = CallerPAL.getFnAttributes();
+  if (FnAttrs.hasAttributes())
+    attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                              FnAttrs));
 
   if (NewRetTy->isVoidTy())
     Caller->setName("");   // Void type should not have a name.
@@ -1302,8 +1280,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
       // mean appending it.  Likewise for attributes.
 
       // Add any result attributes.
-      if (Attributes Attr = Attrs.getRetAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
+      Attributes Attr = Attrs.getRetAttributes();
+      if (Attr.hasAttributes())
+        NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                                                   Attr));
 
       {
         unsigned Idx = 1;
@@ -1323,7 +1303,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
 
           // Add the original argument and attributes.
           NewArgs.push_back(*I);
-          if (Attributes Attr = Attrs.getParamAttributes(Idx))
+          Attr = Attrs.getParamAttributes(Idx);
+          if (Attr.hasAttributes())
             NewAttrs.push_back
               (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
 
@@ -1332,8 +1313,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
       }
 
       // Add any function attributes.
-      if (Attributes Attr = Attrs.getFnAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
+      Attr = Attrs.getFnAttributes();
+      if (Attr.hasAttributes())
+        NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                   Attr));
 
       // The trampoline may have been bitcast to a bogus type (FTy).
       // Handle this by synthesizing a new function type, equal to FTy
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index b59210a9df1..f3f3f8f585d 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1293,15 +1293,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
   // If the source integer type is not the intptr_t type for this target, do a
   // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
   // cast to be exposed to other transforms.
+  unsigned AS = CI.getAddressSpace();
   if (TD) {
     if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
-        TD->getPointerSizeInBits()) {
+        TD->getPointerSizeInBits(AS)) {
       Value *P = Builder->CreateTrunc(CI.getOperand(0),
                                       TD->getIntPtrType(CI.getContext()));
       return new IntToPtrInst(P, CI.getType());
     }
     if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
-        TD->getPointerSizeInBits()) {
+        TD->getPointerSizeInBits(AS)) {
       Value *P = Builder->CreateZExt(CI.getOperand(0),
                                      TD->getIntPtrType(CI.getContext()));
       return new IntToPtrInst(P, CI.getType());
@@ -1368,13 +1369,14 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   // If the destination integer type is not the intptr_t type for this target,
   // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
   // to be exposed to other transforms.
+  unsigned AS = CI.getPointerAddressSpace();
   if (TD) {
-    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits(AS)) {
       Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
                                          TD->getIntPtrType(CI.getContext()));
       return new TruncInst(P, CI.getType());
     }
-    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
+    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits(AS)) {
       Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
                                          TD->getIntPtrType(CI.getContext()));
       return new ZExtInst(P, CI.getType());
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 4d5ffddc4c7..e3e5ddae80b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -365,11 +365,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   // order the state machines in complexity of the generated code.
   Value *Idx = GEP->getOperand(2);
 
+  unsigned AS = GEP->getPointerAddressSpace();
   // If the index is larger than the pointer size of the target, truncate the
   // index down like the GEP would do implicitly.  We don't have to do this for
   // an inbounds GEP because the index can't be out of range.
   if (!GEP->isInBounds() &&
-      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
+      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits(AS))
     Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
 
   // If the comparison is only true for one or two elements, emit direct
@@ -528,10 +529,11 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
     }
   }
 
+  unsigned AS = cast<GetElementPtrInst>(GEP)->getPointerAddressSpace();
   // Okay, we know we have a single variable index, which must be a
   // pointer/array/vector index.  If there is no offset, life is simple, return
   // the index.
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
   if (Offset == 0) {
     // Cast to intptrty in case a truncation occurs.  If an extension is needed,
     // we don't need to bother extending: the extension won't affect where the
@@ -1552,7 +1554,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
   // integer type is the same size as the pointer type.
   if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
-      TD->getPointerSizeInBits() ==
+      TD->getPointerSizeInBits(
+        cast<PtrToIntInst>(LHSCI)->getPointerAddressSpace()) ==
          cast<IntegerType>(DestTy)->getBitWidth()) {
     Value *RHSOp = 0;
     if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index edfc060888b..5356fdcba7c 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2130,6 +2130,9 @@ bool InstCombiner::runOnFunction(Function &F) {
                InstCombineIRInserter(Worklist));
   Builder = &TheBuilder;
 
+  LibCallSimplifier TheSimplifier(TD, TLI);
+  Simplifier = &TheSimplifier;
+
   bool EverMadeChange = false;
 
   // Lower dbg.declare intrinsics otherwise their value may be clobbered
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 10ab9cb6039..b566994edfc 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -148,38 +148,29 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
                                cl::Hidden, cl::init(-1));
 
 namespace {
-
-/// An object of this type is created while instrumenting every function.
-struct AsanFunctionContext {
-  AsanFunctionContext(Function &Function) : F(Function) { }
-
-  Function &F;
-};
-
 /// AddressSanitizer: instrument the code in module to find memory bugs.
-struct AddressSanitizer : public ModulePass {
+struct AddressSanitizer : public FunctionPass {
   AddressSanitizer();
   virtual const char *getPassName() const;
-  void instrumentMop(AsanFunctionContext &AFC, Instruction *I);
-  void instrumentAddress(AsanFunctionContext &AFC,
-                         Instruction *OrigIns, IRBuilder<> &IRB,
+  void instrumentMop(Instruction *I);
+  void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,
                          Value *Addr, uint32_t TypeSize, bool IsWrite);
   Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
                            Value *ShadowValue, uint32_t TypeSize);
   Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
                                  bool IsWrite, size_t AccessSizeIndex);
-  bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI);
-  void instrumentMemIntrinsicParam(AsanFunctionContext &AFC,
-                                   Instruction *OrigIns, Value *Addr,
+  bool instrumentMemIntrinsic(MemIntrinsic *MI);
+  void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
                                    Value *Size,
                                    Instruction *InsertBefore, bool IsWrite);
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
-  bool handleFunction(Module &M, Function &F);
+  bool runOnFunction(Function &F);
   void createInitializerPoisonCalls(Module &M,
                                     Value *FirstAddr, Value *LastAddr);
   bool maybeInsertAsanInitAtFunctionEntry(Function &F);
-  bool poisonStackInFunction(Module &M, Function &F);
-  virtual bool runOnModule(Module &M);
+  bool poisonStackInFunction(Function &F);
+  virtual bool doInitialization(Module &M);
+  virtual bool doFinalization(Module &M);
   bool insertGlobalRedzones(Module &M);
   static char ID;  // Pass identification, replacement for typeid
 
@@ -216,6 +207,8 @@ struct AddressSanitizer : public ModulePass {
   Type *IntptrPtrTy;
   Function *AsanCtorFunction;
   Function *AsanInitFunction;
+  Function *AsanStackMallocFunc, *AsanStackFreeFunc;
+  Function *AsanHandleNoReturnFunc;
   Instruction *CtorInsertBefore;
   OwningPtr<BlackList> BL;
   // This array is indexed by AccessIsWrite and log2(AccessSize).
@@ -230,8 +223,8 @@ char AddressSanitizer::ID = 0;
 INITIALIZE_PASS(AddressSanitizer, "asan",
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
     false, false)
-AddressSanitizer::AddressSanitizer() : ModulePass(ID) { }
-ModulePass *llvm::createAddressSanitizerPass() {
+AddressSanitizer::AddressSanitizer() : FunctionPass(ID) { }
+FunctionPass *llvm::createAddressSanitizerPass() {
   return new AddressSanitizer();
 }
 
@@ -295,12 +288,12 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
 }
 
 void AddressSanitizer::instrumentMemIntrinsicParam(
-    AsanFunctionContext &AFC, Instruction *OrigIns,
+    Instruction *OrigIns,
     Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
   // Check the first byte.
   {
     IRBuilder<> IRB(InsertBefore);
-    instrumentAddress(AFC, OrigIns, IRB, Addr, 8, IsWrite);
+    instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite);
   }
   // Check the last byte.
   {
@@ -310,13 +303,12 @@ void AddressSanitizer::instrumentMemIntrinsicParam(
     SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false);
     Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
     Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne);
-    instrumentAddress(AFC, OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
+    instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
   }
 }
 
 // Instrument memset/memmove/memcpy
-bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
-                                              MemIntrinsic *MI) {
+bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
   Value *Dst = MI->getDest();
   MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
   Value *Src = MemTran ? MemTran->getSource() : 0;
@@ -335,9 +327,9 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
     InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
   }
 
-  instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);
+  instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
   if (Src)
-    instrumentMemIntrinsicParam(AFC, MI, Src, Length, InsertBefore, false);
+    instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);
   return true;
 }
 
@@ -391,7 +383,7 @@ bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
   return DynamicallyInitializedGlobals.count(G);
 }
 
-void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
+void AddressSanitizer::instrumentMop(Instruction *I) {
   bool IsWrite = false;
   Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
   assert(Addr);
@@ -424,7 +416,7 @@ void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
   }
 
   IRBuilder<> IRB(I);
-  instrumentAddress(AFC, I, IRB, Addr, TypeSize, IsWrite);
+  instrumentAddress(I, IRB, Addr, TypeSize, IsWrite);
 }
 
 // Validate the result of Module::getOrInsertFunction called for an interface
@@ -469,8 +461,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
   return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
 }
 
-void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
-                                         Instruction *OrigIns,
+void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
                                          IRBuilder<> &IRB, Value *Addr,
                                          uint32_t TypeSize, bool IsWrite) {
   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
@@ -494,7 +485,8 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
     BasicBlock *NextBB = CheckTerm->getSuccessor(0);
     IRB.SetInsertPoint(CheckTerm);
     Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
-    BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB);
+    BasicBlock *CrashBlock =
+        BasicBlock::Create(*C, "", NextBB->getParent(), NextBB);
     CrashTerm = new UnreachableInst(*C, CrashBlock);
     BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
     ReplaceInstWithInst(CheckTerm, NewTerm);
@@ -734,15 +726,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
 }
 
 // virtual
-bool AddressSanitizer::runOnModule(Module &M) {
+bool AddressSanitizer::doInitialization(Module &M) {
   // Initialize the private fields. No one has accessed them before.
   TD = getAnalysisIfAvailable<DataLayout>();
+
   if (!TD)
     return false;
   BL.reset(new BlackList(ClBlackListFile));
 
   C = &(M.getContext());
-  LongSize = TD->getPointerSizeInBits();
+  LongSize = TD->getPointerSizeInBits(0);
   IntptrTy = Type::getIntNTy(*C, LongSize);
   IntptrPtrTy = PointerType::get(IntptrTy, 0);
 
@@ -771,6 +764,15 @@ bool AddressSanitizer::runOnModule(Module &M) {
           M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
     }
   }
+
+  AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL));
+  AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanStackFreeName, IRB.getVoidTy(),
+      IntptrTy, IntptrTy, IntptrTy, NULL));
+  AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+
   // We insert an empty inline asm after __asan_report* to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
@@ -797,10 +799,6 @@ bool AddressSanitizer::runOnModule(Module &M) {
   // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
   RedzoneSize = std::max(32, (int)(1 << MappingScale));
 
-  bool Res = false;
-
-  if (ClGlobals)
-    Res |= insertGlobalRedzones(M);
 
   if (ClMappingOffsetLog >= 0) {
     // Tell the run-time the current values of mapping offset and scale.
@@ -820,17 +818,20 @@ bool AddressSanitizer::runOnModule(Module &M) {
     IRB.CreateLoad(asan_mapping_scale, true);
   }
 
-
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration()) continue;
-    Res |= handleFunction(M, *F);
-  }
-
   appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
 
-  return Res;
+  return true;
+}
+
+bool AddressSanitizer::doFinalization(Module &M) {
+  // We transform the globals at the very end so that the optimization analysis
+  // works on the original globals.
+  if (ClGlobals)
+    return insertGlobalRedzones(M);
+  return false;
 }
 
+
 bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   // For each NSObject descendant having a +load method, this method is invoked
   // by the ObjC runtime before any of the static constructors is called.
@@ -847,7 +848,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   return false;
 }
 
-bool AddressSanitizer::handleFunction(Module &M, Function &F) {
+bool AddressSanitizer::runOnFunction(Function &F) {
   if (BL->isIn(F)) return false;
   if (&F == AsanCtorFunction) return false;
 
@@ -899,8 +900,6 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
     }
   }
 
-  AsanFunctionContext AFC(F);
-
   // Instrument.
   int NumInstrumented = 0;
   for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
@@ -908,24 +907,23 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
     if (ClDebugMin < 0 || ClDebugMax < 0 ||
         (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
       if (isInterestingMemoryAccess(Inst, &IsWrite))
-        instrumentMop(AFC, Inst);
+        instrumentMop(Inst);
       else
-        instrumentMemIntrinsic(AFC, cast<MemIntrinsic>(Inst));
+        instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
     }
     NumInstrumented++;
   }
 
   DEBUG(dbgs() << F);
 
-  bool ChangedStack = poisonStackInFunction(M, F);
+  bool ChangedStack = poisonStackInFunction(F);
 
   // We must unpoison the stack before every NoReturn call (throw, _exit, etc).
   // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37
   for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) {
     Instruction *CI = NoReturnCalls[i];
     IRBuilder<> IRB(CI);
-    IRB.CreateCall(M.getOrInsertFunction(kAsanHandleNoReturnName,
-                                         IRB.getVoidTy(), NULL));
+    IRB.CreateCall(AsanHandleNoReturnFunc);
   }
 
   return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
@@ -1039,7 +1037,7 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
 // compiler hoists the load of the shadow value somewhere too high.
 // This causes asan to report a non-existing bug on 453.povray.
 // It sounds like an LLVM bug.
-bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
+bool AddressSanitizer::poisonStackInFunction(Function &F) {
   if (!ClStack) return false;
   SmallVector<AllocaInst*, 16> AllocaVec;
   SmallVector<Instruction*, 8> RetVec;
@@ -1089,8 +1087,6 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
   Value *LocalStackBase = OrigStackBase;
 
   if (DoStackMalloc) {
-    Value *AsanStackMallocFunc = M.getOrInsertFunction(
-        kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL);
     LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
         ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
   }
@@ -1126,7 +1122,7 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
                                    ConstantInt::get(IntptrTy, LongSize/8));
   BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
   Value *Description = IRB.CreatePointerCast(
-      createPrivateGlobalForString(M, StackDescription.str()),
+      createPrivateGlobalForString(*F.getParent(), StackDescription.str()),
       IntptrTy);
   IRB.CreateStore(Description, BasePlus1);
 
@@ -1134,13 +1130,6 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
   Value *ShadowBase = memToShadow(LocalStackBase, IRB);
   PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true);
 
-  Value *AsanStackFreeFunc = NULL;
-  if (DoStackMalloc) {
-    AsanStackFreeFunc = M.getOrInsertFunction(
-        kAsanStackFreeName, IRB.getVoidTy(),
-        IntptrTy, IntptrTy, IntptrTy, NULL);
-  }
-
   // Unpoison the stack before all ret instructions.
   for (size_t i = 0, n = RetVec.size(); i < n; i++) {
     Instruction *Ret = RetVec[i];
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 4d31444b764..4d8176bc6c7 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -774,8 +774,10 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
     // Conservatively require the attributes of the call to match those of the
     // return. Ignore noalias because it doesn't affect the call sequence.
     Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
-    if (Attributes::Builder(CalleeRetAttr ^ CallerRetAttr)
-        .removeAttribute(Attributes::NoAlias).hasAttributes())
+    if (AttrBuilder(CalleeRetAttr).
+          removeAttribute(Attributes::NoAlias) !=
+        AttrBuilder(CallerRetAttr).
+          removeAttribute(Attributes::NoAlias))
       continue;
 
     // Make sure the call instruction is followed by an unconditional branch to
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 602e5a4785c..736cc05e043 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -701,6 +701,22 @@ bool DSE::HandleFree(CallInst *F) {
   return MadeChange;
 }
 
+namespace {
+  struct CouldRef {
+    typedef Value *argument_type;
+    const CallSite CS;
+    AliasAnalysis *AA;
+
+    bool operator()(Value *I) {
+      // See if the call site touches the value.
+      AliasAnalysis::ModRefResult A =
+        AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
+
+      return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
+    }
+  };
+}
+
 /// handleEndBlock - Remove dead stores to stack-allocated locations in the
 /// function end block.  Ex:
 /// %A = alloca i32
@@ -802,26 +818,14 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 
       // If the call might load from any of our allocas, then any store above
       // the call is live.
-      SmallVector<Value*, 8> LiveAllocas;
-      for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
-           E = DeadStackObjects.end(); I != E; ++I) {
-        // See if the call site touches it.
-        AliasAnalysis::ModRefResult A =
-          AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
-
-        if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
-          LiveAllocas.push_back(*I);
-      }
+      CouldRef Pred = { CS, AA };
+      DeadStackObjects.remove_if(Pred);
 
       // If all of the allocas were clobbered by the call then we're not going
       // to find anything else to process.
-      if (DeadStackObjects.size() == LiveAllocas.size())
+      if (DeadStackObjects.empty())
         break;
 
-      for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
-           E = LiveAllocas.end(); I != E; ++I)
-        DeadStackObjects.remove(*I);
-
       continue;
     }
 
@@ -858,6 +862,20 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
   return MadeChange;
 }
 
+namespace {
+  struct CouldAlias {
+    typedef Value *argument_type;
+    const AliasAnalysis::Location &LoadedLoc;
+    AliasAnalysis *AA;
+
+    bool operator()(Value *I) {
+      // See if the loaded location could alias the stack location.
+      AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
+      return !AA->isNoAlias(StackLoc, LoadedLoc);
+    }
+  };
+}
+
 /// RemoveAccessedObjects - Check to see if the specified location may alias any
 /// of the stack objects in the DeadStackObjects set.  If so, they become live
 /// because the location is being loaded.
@@ -876,16 +894,7 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
     return;
   }
 
-  SmallVector<Value*, 16> NowLive;
-  for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
-       E = DeadStackObjects.end(); I != E; ++I) {
-    // See if the loaded location could alias the stack location.
-    AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
-    if (!AA->isNoAlias(StackLoc, LoadedLoc))
-      NowLive.push_back(*I);
-  }
-
-  for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
-       I != E; ++I)
-    DeadStackObjects.remove(*I);
+  // Remove objects that could alias LoadedLoc.
+  CouldAlias Pred = { LoadedLoc, AA };
+  DeadStackObjects.remove_if(Pred);
 }
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 958348d9faa..99a62dbe62f 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -37,7 +37,7 @@
 //
 // TODO: Handle multiple loops at a time.
 //
-// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
+// TODO: Should AddrMode::BaseGV be changed to a ConstantExpr
 //       instead of a GlobalValue?
 //
 // TODO: When truncation is free, truncate ICmp users' operands to make it a
@@ -67,6 +67,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/TargetTransformInfo.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/DenseSet.h"
@@ -74,7 +75,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -1118,7 +1118,7 @@ public:
   enum KindType {
     Basic,   ///< A normal use, with no folding.
     Special, ///< A special case of basic, allowing -1 scales.
-    Address, ///< An address use; folding according to TargetLowering
+    Address, ///< An address use; folding according to ScalarTargetTransformInfo.
     ICmpZero ///< An equality icmp with both operands folded into one.
     // TODO: Add a generic icmp too?
   };
@@ -1272,12 +1272,12 @@ void LSRUse::dump() const {
 /// address-mode folding and special icmp tricks.
 static bool isLegalUse(const AddrMode &AM,
                        LSRUse::KindType Kind, Type *AccessTy,
-                       const TargetLowering *TLI) {
+                       const ScalarTargetTransformInfo *STTI) {
   switch (Kind) {
   case LSRUse::Address:
     // If we have low-level target information, ask the target if it can
     // completely fold this address.
-    if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+    if (STTI) return STTI->isLegalAddressingMode(AM, AccessTy);
 
     // Otherwise, just guess that reg+reg addressing is legal.
     return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
@@ -1300,7 +1300,7 @@ static bool isLegalUse(const AddrMode &AM,
     // If we have low-level target information, ask the target if it can fold an
     // integer immediate on an icmp.
     if (AM.BaseOffs != 0) {
-      if (!TLI)
+      if (!STTI)
         return false;
       // We have one of:
       // ICmpZero     BaseReg + Offset => ICmp BaseReg, -Offset
@@ -1309,7 +1309,7 @@ static bool isLegalUse(const AddrMode &AM,
       int64_t Offs = AM.BaseOffs;
       if (AM.Scale == 0)
         Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
-      return TLI->isLegalICmpImmediate(Offs);
+      return STTI->isLegalICmpImmediate(Offs);
     }
 
     // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
@@ -1330,20 +1330,20 @@ static bool isLegalUse(const AddrMode &AM,
 static bool isLegalUse(AddrMode AM,
                        int64_t MinOffset, int64_t MaxOffset,
                        LSRUse::KindType Kind, Type *AccessTy,
-                       const TargetLowering *TLI) {
+                       const ScalarTargetTransformInfo *LTTI) {
   // Check for overflow.
   if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
       (MinOffset > 0))
     return false;
   AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
-  if (isLegalUse(AM, Kind, AccessTy, TLI)) {
+  if (isLegalUse(AM, Kind, AccessTy, LTTI)) {
     AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
     // Check for overflow.
     if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
         (MaxOffset > 0))
       return false;
     AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
-    return isLegalUse(AM, Kind, AccessTy, TLI);
+    return isLegalUse(AM, Kind, AccessTy, LTTI);
   }
   return false;
 }
@@ -1352,7 +1352,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
                              GlobalValue *BaseGV,
                              bool HasBaseReg,
                              LSRUse::KindType Kind, Type *AccessTy,
-                             const TargetLowering *TLI) {
+                             const ScalarTargetTransformInfo *LTTI) {
   // Fast-path: zero is always foldable.
   if (BaseOffs == 0 && !BaseGV) return true;
 
@@ -1371,14 +1371,14 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
     AM.HasBaseReg = true;
   }
 
-  return isLegalUse(AM, Kind, AccessTy, TLI);
+  return isLegalUse(AM, Kind, AccessTy, LTTI);
 }
 
 static bool isAlwaysFoldable(const SCEV *S,
                              int64_t MinOffset, int64_t MaxOffset,
                              bool HasBaseReg,
                              LSRUse::KindType Kind, Type *AccessTy,
-                             const TargetLowering *TLI,
+                             const ScalarTargetTransformInfo *LTTI,
                              ScalarEvolution &SE) {
   // Fast-path: zero is always foldable.
   if (S->isZero()) return true;
@@ -1402,7 +1402,7 @@ static bool isAlwaysFoldable(const SCEV *S,
   AM.HasBaseReg = HasBaseReg;
   AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
 
-  return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+  return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, LTTI);
 }
 
 namespace {
@@ -1502,7 +1502,7 @@ class LSRInstance {
   ScalarEvolution &SE;
   DominatorTree &DT;
   LoopInfo &LI;
-  const TargetLowering *const TLI;
+  const ScalarTargetTransformInfo *const STTI;
   Loop *const L;
   bool Changed;
 
@@ -1638,7 +1638,7 @@ class LSRInstance {
                          Pass *P);
 
 public:
-  LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+  LSRInstance(const ScalarTargetTransformInfo *ltti, Loop *l, Pass *P);
 
   bool getChanged() const { return Changed; }
 
@@ -1688,11 +1688,10 @@ void LSRInstance::OptimizeShadowIV() {
     }
     if (!DestTy) continue;
 
-    if (TLI) {
+    if (STTI) {
       // If target does not support DestTy natively then do not apply
       // this transformation.
-      EVT DVT = TLI->getValueType(DestTy);
-      if (!TLI->isTypeLegal(DVT)) continue;
+      if (!STTI->isTypeLegal(DestTy)) continue;
     }
 
     PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
@@ -2015,18 +2014,18 @@ LSRInstance::OptimizeLoopTermCond() {
             if (C->getValue().getMinSignedBits() >= 64 ||
                 C->getValue().isMinSignedValue())
               goto decline_post_inc;
-            // Without TLI, assume that any stride might be valid, and so any
+            // Without STTI, assume that any stride might be valid, and so any
             // use might be shared.
-            if (!TLI)
+            if (!STTI)
               goto decline_post_inc;
             // Check for possible scaled-address reuse.
             Type *AccessTy = getAccessType(UI->getUser());
             AddrMode AM;
             AM.Scale = C->getSExtValue();
-            if (TLI->isLegalAddressingMode(AM, AccessTy))
+            if (STTI->isLegalAddressingMode(AM, AccessTy))
               goto decline_post_inc;
             AM.Scale = -AM.Scale;
-            if (TLI->isLegalAddressingMode(AM, AccessTy))
+            if (STTI->isLegalAddressingMode(AM, AccessTy))
               goto decline_post_inc;
           }
         }
@@ -2097,12 +2096,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
   // Conservatively assume HasBaseReg is true for now.
   if (NewOffset < LU.MinOffset) {
     if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
-                          Kind, AccessTy, TLI))
+                          Kind, AccessTy, STTI))
       return false;
     NewMinOffset = NewOffset;
   } else if (NewOffset > LU.MaxOffset) {
     if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
-                          Kind, AccessTy, TLI))
+                          Kind, AccessTy, STTI))
       return false;
     NewMaxOffset = NewOffset;
   }
@@ -2131,7 +2130,7 @@ LSRInstance::getUse(const SCEV *&Expr,
   int64_t Offset = ExtractImmediate(Expr, SE);
 
   // Basic uses can't accept any offset, for example.
-  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, STTI)) {
     Expr = Copy;
     Offset = 0;
   }
@@ -2396,7 +2395,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
 /// TODO: Consider IVInc free if it's already used in another chains.
 static bool
 isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
-                  ScalarEvolution &SE, const TargetLowering *TLI) {
+                  ScalarEvolution &SE, const ScalarTargetTransformInfo *STTI) {
   if (StressIVChain)
     return true;
 
@@ -2654,7 +2653,7 @@ void LSRInstance::CollectChains() {
   for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
        UsersIdx < NChains; ++UsersIdx) {
     if (!isProfitableChain(IVChainVec[UsersIdx],
-                           ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+                           ChainUsersVec[UsersIdx].FarUsers, SE, STTI))
       continue;
     // Preserve the chain at UsesIdx.
     if (ChainIdx != UsersIdx)
@@ -2681,7 +2680,8 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
 
 /// Return true if the IVInc can be folded into an addressing mode.
 static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
-                             Value *Operand, const TargetLowering *TLI) {
+                             Value *Operand,
+                             const ScalarTargetTransformInfo *STTI) {
   const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
   if (!IncConst || !isAddressUse(UserInst, Operand))
     return false;
@@ -2691,7 +2691,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
 
   int64_t IncOffset = IncConst->getValue()->getSExtValue();
   if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
-                       LSRUse::Address, getAccessType(UserInst), TLI))
+                       LSRUse::Address, getAccessType(UserInst), STTI))
     return false;
 
   return true;
@@ -2762,7 +2762,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
 
       // If an IV increment can't be folded, use it as the next IV value.
       if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
-                            TLI)) {
+                            STTI)) {
         assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
         IVSrc = IVOper;
         LeftOverExpr = 0;
@@ -3108,7 +3108,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
       // into an immediate field.
       if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
                            Base.getNumRegs() > 1,
-                           LU.Kind, LU.AccessTy, TLI, SE))
+                           LU.Kind, LU.AccessTy, STTI, SE))
         continue;
 
       // Collect all operands except *J.
@@ -3122,7 +3122,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
       if (InnerAddOps.size() == 1 &&
           isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
                            Base.getNumRegs() > 1,
-                           LU.Kind, LU.AccessTy, TLI, SE))
+                           LU.Kind, LU.AccessTy, STTI, SE))
         continue;
 
       const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
@@ -3132,9 +3132,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
 
       // Add the remaining pieces of the add back into the new formula.
       const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
-      if (TLI && InnerSumSC &&
+      if (STTI && InnerSumSC &&
           SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
-          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+          STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
                                    InnerSumSC->getValue()->getZExtValue())) {
         F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
                            InnerSumSC->getValue()->getZExtValue();
@@ -3144,8 +3144,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
 
       // Add J as its own register, or an unfolded immediate.
       const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
-      if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
-          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+      if (STTI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+          STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
                                    SC->getValue()->getZExtValue()))
         F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
                            SC->getValue()->getZExtValue();
@@ -3205,7 +3205,7 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
     Formula F = Base;
     F.AM.BaseGV = GV;
     if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI))
+                    LU.Kind, LU.AccessTy, STTI))
       continue;
     F.BaseRegs[i] = G;
     (void)InsertFormula(LU, LUIdx, F);
@@ -3230,7 +3230,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
       Formula F = Base;
       F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
       if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
-                     LU.Kind, LU.AccessTy, TLI)) {
+                     LU.Kind, LU.AccessTy, STTI)) {
         // Add the offset to the base register.
         const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
         // If it cancelled out, drop the base register, otherwise update it.
@@ -3250,7 +3250,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
     Formula F = Base;
     F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
     if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI))
+                    LU.Kind, LU.AccessTy, STTI))
       continue;
     F.BaseRegs[i] = G;
     (void)InsertFormula(LU, LUIdx, F);
@@ -3297,7 +3297,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
     F.AM.BaseOffs = NewBaseOffs;
 
     // Check that this scale is legal.
-    if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+    if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, STTI))
       continue;
 
     // Compensate for the use having MinOffset built into it.
@@ -3353,12 +3353,12 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
     Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
     // Check whether this scale is going to be legal.
     if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI)) {
+                    LU.Kind, LU.AccessTy, STTI)) {
       // As a special-case, handle special out-of-loop Basic users specially.
       // TODO: Reconsider this special case.
       if (LU.Kind == LSRUse::Basic &&
           isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
-                     LSRUse::Special, LU.AccessTy, TLI) &&
+                     LSRUse::Special, LU.AccessTy, STTI) &&
           LU.AllFixupsOutsideLoop)
         LU.Kind = LSRUse::Special;
       else
@@ -3391,8 +3391,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
 
 /// GenerateTruncates - Generate reuse formulae from different IV types.
 void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
-  // This requires TargetLowering to tell us which truncates are free.
-  if (!TLI) return;
+  // This requires ScalarTargetTransformInfo to tell us which truncates are free.
+  if (!STTI) return;
 
   // Don't bother truncating symbolic values.
   if (Base.AM.BaseGV) return;
@@ -3405,7 +3405,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
   for (SmallSetVector<Type *, 4>::const_iterator
        I = Types.begin(), E = Types.end(); I != E; ++I) {
     Type *SrcTy = *I;
-    if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+    if (SrcTy != DstTy && STTI->isTruncateFree(SrcTy, DstTy)) {
       Formula F = Base;
 
       if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
@@ -3561,7 +3561,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
         Formula NewF = F;
         NewF.AM.BaseOffs = Offs;
         if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
-                        LU.Kind, LU.AccessTy, TLI))
+                        LU.Kind, LU.AccessTy, STTI))
           continue;
         NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
 
@@ -3586,9 +3586,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
           Formula NewF = F;
           NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
           if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
-                          LU.Kind, LU.AccessTy, TLI)) {
-            if (!TLI ||
-                !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+                          LU.Kind, LU.AccessTy, STTI)) {
+            if (!STTI ||
+                !STTI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
               continue;
             NewF = F;
             NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
@@ -3900,7 +3900,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
                 Formula &F = LUThatHas->Formulae[i];
                 if (!isLegalUse(F.AM,
                                 LUThatHas->MinOffset, LUThatHas->MaxOffset,
-                                LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+                                LUThatHas->Kind, LUThatHas->AccessTy, STTI)) {
                   DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
                         dbgs() << '\n');
                   LUThatHas->DeleteFormula(F);
@@ -4589,12 +4589,12 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
   Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
 }
 
-LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
+LSRInstance::LSRInstance(const ScalarTargetTransformInfo *stti, Loop *l, Pass *P)
   : IU(P->getAnalysis<IVUsers>()),
     SE(P->getAnalysis<ScalarEvolution>()),
     DT(P->getAnalysis<DominatorTree>()),
     LI(P->getAnalysis<LoopInfo>()),
-    TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
+    STTI(stti), L(l), Changed(false), IVIncInsertPos(0) {
 
   // If LoopSimplify form is not available, stay out of trouble.
   if (!L->isLoopSimplifyForm())
@@ -4684,7 +4684,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
      for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
           JE = LU.Formulae.end(); J != JE; ++J)
         assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
-                          LU.Kind, LU.AccessTy, TLI) &&
+                          LU.Kind, LU.AccessTy, STTI) &&
                "Illegal formula generated!");
   };
 #endif
@@ -4757,13 +4757,13 @@ void LSRInstance::dump() const {
 namespace {
 
 class LoopStrengthReduce : public LoopPass {
-  /// TLI - Keep a pointer of a TargetLowering to consult for determining
-  /// transformation profitability.
-  const TargetLowering *const TLI;
+  /// ScalarTargetTransformInfo provides target information that is needed
+  /// for strength reducing loops.
+  const ScalarTargetTransformInfo *STTI;
 
 public:
   static char ID; // Pass ID, replacement for typeid
-  explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+  LoopStrengthReduce();
 
 private:
   bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -4783,13 +4783,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
                 "Loop Strength Reduction", false, false)
 
-
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
-  return new LoopStrengthReduce(TLI);
+Pass *llvm::createLoopStrengthReducePass() {
+  return new LoopStrengthReduce();
 }
 
-LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
-  : LoopPass(ID), TLI(tli) {
+LoopStrengthReduce::LoopStrengthReduce()
+  : LoopPass(ID), STTI(0) {
     initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
   }
 
@@ -4815,8 +4814,13 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
 bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
   bool Changed = false;
 
+  TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+
+  if (TTI)
+    STTI = TTI->getScalarTargetTransformInfo();
+
   // Run the main LSR transformation.
-  Changed |= LSRInstance(TLI, L, this).getChanged();
+  Changed |= LSRInstance(STTI, L, this).getChanged();
 
   // Remove any extra phis created by processing inner loops.
   Changed |= DeleteDeadPHIs(L->getHeader());
@@ -4827,7 +4831,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
     Rewriter.setDebugType(DEBUG_TYPE);
 #endif
     unsigned numFolded = Rewriter.
-      replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+      replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, STTI);
     if (numFolded) {
       Changed = true;
       DeleteTriviallyDeadInstructions(DeadInsts);
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 517657cf526..97fff9edd68 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -174,10 +174,11 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
   // this width can be stored.  If so, check to see whether we will end up
   // actually reducing the number of stores used.
   unsigned Bytes = unsigned(End-Start);
-  unsigned NumPointerStores = Bytes/TD.getPointerSize();
+  unsigned AS = cast<StoreInst>(TheStores[0])->getPointerAddressSpace();
+  unsigned NumPointerStores = Bytes/TD.getPointerSize(AS);
 
   // Assume the remaining bytes if any are done a byte at a time.
-  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(AS);
 
   // If we will reduce the # stores (according to this heuristic), do the
   // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 629f9d2ff57..dfdf50549da 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -1788,9 +1788,9 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
     Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
     Type *Params[] = { I8X };
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, Attributes::NoUnwind));
     RetainRVCallee =
       M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
                              Attributes);
@@ -1804,9 +1804,9 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
     Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
     Type *Params[] = { I8X };
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, Attributes::NoUnwind));
     AutoreleaseRVCallee =
       M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
                              Attributes);
@@ -1818,9 +1818,9 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
   if (!ReleaseCallee) {
     LLVMContext &C = M->getContext();
     Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, Attributes::NoUnwind));
     ReleaseCallee =
       M->getOrInsertFunction(
         "objc_release",
@@ -1834,9 +1834,9 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) {
   if (!RetainCallee) {
     LLVMContext &C = M->getContext();
     Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, Attributes::NoUnwind));
     RetainCallee =
       M->getOrInsertFunction(
         "objc_retain",
@@ -1865,9 +1865,9 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
   if (!AutoreleaseCallee) {
     LLVMContext &C = M->getContext();
     Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, Attributes::NoUnwind));
     AutoreleaseCallee =
       M->getOrInsertFunction(
         "objc_autorelease",
@@ -3840,13 +3840,10 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
     Type *I8XX = PointerType::getUnqual(I8X);
     Type *Params[] = { I8XX, I8X };
 
-    Attributes::Builder BNoUnwind;
-    BNoUnwind.addAttribute(Attributes::NoUnwind);
-    Attributes::Builder BNoCapture;
-    BNoCapture.addAttribute(Attributes::NoCapture);
     AttrListPtr Attributes = AttrListPtr()
-      .addAttr(~0u, Attributes::get(BNoUnwind))
-      .addAttr(1, Attributes::get(BNoCapture));
+      .addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+               Attributes::get(C, Attributes::NoUnwind))
+      .addAttr(M->getContext(), 1, Attributes::get(C, Attributes::NoCapture));
 
     StoreStrongCallee =
       M->getOrInsertFunction(
@@ -3863,9 +3860,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
     Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
     Type *Params[] = { I8X };
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, Attributes::NoUnwind));
     RetainAutoreleaseCallee =
       M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
   }
@@ -3878,9 +3875,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
     Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
     Type *Params[] = { I8X };
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    Attributes::Builder B;
-    B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, Attributes::NoUnwind));
     RetainAutoreleaseRVCallee =
       M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
                              Attributes);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index ca762514929..3e84a91c1db 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -447,6 +447,7 @@ protected:
 
   bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) {
     GEPOffset = Offset;
+    unsigned int AS = GEPI.getPointerAddressSpace();
     for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI);
          GTI != GTE; ++GTI) {
       ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
@@ -476,7 +477,7 @@ protected:
         continue;
       }
 
-      APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits());
+      APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits(AS));
       Index *= APInt(Index.getBitWidth(),
                      TD.getTypeAllocSize(GTI.getIndexedType()));
       Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset,
@@ -1784,7 +1785,9 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
       break;
     if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
       ElementTy = SeqTy->getElementType();
-      Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0)));
+      Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(
+                ElementTy->isPointerTy() ? 
+                cast<PointerType>(ElementTy)->getAddressSpace(): 0), 0)));
     } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
       if (STy->element_begin() == STy->element_end())
         break; // Nothing left to descend into.
@@ -2004,6 +2007,51 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
   return Ptr;
 }
 
+/// \brief Test whether we can convert a value from the old to the new type.
+///
+/// This predicate should be used to guard calls to convertValue in order to
+/// ensure that we only try to convert viable values. The strategy is that we
+/// will peel off single element struct and array wrappings to get to an
+/// underlying value, and convert that value.
+static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
+  if (OldTy == NewTy)
+    return true;
+  if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
+    return false;
+  if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
+    return false;
+
+  if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
+    if (NewTy->isPointerTy() && OldTy->isPointerTy())
+      return true;
+    if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
+      return true;
+    return false;
+  }
+
+  return true;
+}
+
+/// \brief Generic routine to convert an SSA value to a value of a different
+/// type.
+///
+/// This will try various different casting techniques, such as bitcasts,
+/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
+/// two types for viability with this routine.
+static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+                           Type *Ty) {
+  assert(canConvertValue(DL, V->getType(), Ty) &&
+         "Value not convertable to type");
+  if (V->getType() == Ty)
+    return V;
+  if (V->getType()->isIntegerTy() && Ty->isPointerTy())
+    return IRB.CreateIntToPtr(V, Ty);
+  if (V->getType()->isPointerTy() && Ty->isIntegerTy())
+    return IRB.CreatePtrToInt(V, Ty);
+
+  return IRB.CreateBitCast(V, Ty);
+}
+
 /// \brief Test whether the given alloca partition can be promoted to a vector.
 ///
 /// This is a quick test to check whether we can rewrite a particular alloca
@@ -2075,47 +2123,74 @@ static bool isVectorPromotionViable(const DataLayout &TD,
   return true;
 }
 
-/// \brief Test whether the given alloca partition can be promoted to an int.
+/// \brief Test whether the given alloca partition's integer operations can be
+/// widened to promotable ones.
 ///
-/// This is a quick test to check whether we can rewrite a particular alloca
-/// partition (and its newly formed alloca) into an integer alloca suitable for
-/// promotion to an SSA value. We only can ensure this for a limited set of
-/// operations, and we don't want to do the rewrites unless we are confident
-/// that the result will be promotable, so we have an early test here.
-static bool isIntegerPromotionViable(const DataLayout &TD,
-                                     Type *AllocaTy,
-                                     uint64_t AllocBeginOffset,
-                                     AllocaPartitioning &P,
-                                     AllocaPartitioning::const_use_iterator I,
-                                     AllocaPartitioning::const_use_iterator E) {
-  IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
-  if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth())
+/// This is a quick test to check whether we can rewrite the integer loads and
+/// stores to a particular alloca into wider loads and stores and be able to
+/// promote the resulting alloca.
+static bool isIntegerWideningViable(const DataLayout &TD,
+                                    Type *AllocaTy,
+                                    uint64_t AllocBeginOffset,
+                                    AllocaPartitioning &P,
+                                    AllocaPartitioning::const_use_iterator I,
+                                    AllocaPartitioning::const_use_iterator E) {
+  uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy);
+
+  // Don't try to handle allocas with bit-padding.
+  if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
     return false;
 
+  uint64_t Size = TD.getTypeStoreSize(AllocaTy);
+
   // Check the uses to ensure the uses are (likely) promoteable integer uses.
   // Also ensure that the alloca has a covering load or store. We don't want
-  // promote because of some other unsplittable entry (which we may make
-  // splittable later) and lose the ability to promote each element access.
+  // to widen the integer operotains only to fail to promote due to some other
+  // unsplittable entry (which we may make splittable later).
   bool WholeAllocaOp = false;
   for (; I != E; ++I) {
     if (!I->U)
       continue; // Skip dead use.
 
+    uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
+    uint64_t RelEnd = I->EndOffset - AllocBeginOffset;
+
     // We can't reasonably handle cases where the load or store extends past
     // the end of the aloca's type and into its padding.
-    if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty))
+    if (RelEnd > Size)
       return false;
 
     if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
-      if (LI->isVolatile() || !LI->getType()->isIntegerTy())
+      if (LI->isVolatile())
         return false;
-      if (LI->getType() == Ty)
+      if (RelBegin == 0 && RelEnd == Size)
         WholeAllocaOp = true;
+      if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
+        if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy))
+          return false;
+        continue;
+      }
+      // Non-integer loads need to be convertible from the alloca type so that
+      // they are promotable.
+      if (RelBegin != 0 || RelEnd != Size ||
+          !canConvertValue(TD, AllocaTy, LI->getType()))
+        return false;
     } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
-      if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())
+      Type *ValueTy = SI->getValueOperand()->getType();
+      if (SI->isVolatile())
         return false;
-      if (SI->getValueOperand()->getType() == Ty)
+      if (RelBegin == 0 && RelEnd == Size)
         WholeAllocaOp = true;
+      if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
+        if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy))
+          return false;
+        continue;
+      }
+      // Non-integer stores need to be convertible to the alloca type so that
+      // they are promotable.
+      if (RelBegin != 0 || RelEnd != Size ||
+          !canConvertValue(TD, ValueTy, AllocaTy))
+        return false;
     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
       if (MI->isVolatile())
         return false;
@@ -2125,6 +2200,10 @@ static bool isIntegerPromotionViable(const DataLayout &TD,
         if (!MTO.IsSplittable)
           return false;
       }
+    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+          II->getIntrinsicID() != Intrinsic::lifetime_end)
+        return false;
     } else {
       return false;
     }
@@ -2149,6 +2228,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
   SROA &Pass;
   AllocaInst &OldAI, &NewAI;
   const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
+  Type *NewAllocaTy;
 
   // If we are rewriting an alloca partition which can be written as pure
   // vector operations, we stash extra information here. When VecTy is
@@ -2164,10 +2244,10 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
   uint64_t ElementSize;
 
   // This is a convenience and flag variable that will be null unless the new
-  // alloca has a promotion-targeted integer type due to passing
-  // isIntegerPromotionViable above. If it is non-null does, the desired
+  // alloca's integer operations should be widened to this integer type due to
+  // passing isIntegerWideningViable above. If it is non-null, the desired
   // integer type will be stored here for easy access during rewriting.
-  IntegerType *IntPromotionTy;
+  IntegerType *IntTy;
 
   // The offset of the partition user currently being rewritten.
   uint64_t BeginOffset, EndOffset;
@@ -2186,7 +2266,8 @@ public:
       OldAI(OldAI), NewAI(NewAI),
       NewAllocaBeginOffset(NewBeginOffset),
       NewAllocaEndOffset(NewEndOffset),
-      VecTy(), ElementTy(), ElementSize(), IntPromotionTy(),
+      NewAllocaTy(NewAI.getAllocatedType()),
+      VecTy(), ElementTy(), ElementSize(), IntTy(),
       BeginOffset(), EndOffset() {
   }
 
@@ -2202,9 +2283,10 @@ public:
       assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
              "Only multiple-of-8 sized vector elements are viable");
       ElementSize = VecTy->getScalarSizeInBits() / 8;
-    } else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(),
-                                        NewAllocaBeginOffset, P, I, E)) {
-      IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType());
+    } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(),
+                                       NewAllocaBeginOffset, P, I, E)) {
+      IntTy = Type::getIntNTy(NewAI.getContext(),
+                              TD.getTypeSizeInBits(NewAI.getAllocatedType()));
     }
     bool CanSROA = true;
     for (; I != E; ++I) {
@@ -2223,6 +2305,10 @@ public:
       ElementTy = 0;
       ElementSize = 0;
     }
+    if (IntTy) {
+      assert(CanSROA);
+      IntTy = 0;
+    }
     return CanSROA;
   }
 
@@ -2239,7 +2325,8 @@ private:
 
   Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
     assert(BeginOffset >= NewAllocaBeginOffset);
-    APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
+    unsigned AS = cast<PointerType>(PointerTy)->getAddressSpace();
+    APInt Offset(TD.getPointerSizeInBits(AS), BeginOffset - NewAllocaBeginOffset);
     return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
   }
 
@@ -2286,55 +2373,56 @@ private:
 
   Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
                         uint64_t Offset) {
-    assert(IntPromotionTy && "Alloca is not an integer we can extract from");
+    assert(IntTy && "We cannot extract an integer from the alloca");
     Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                      getName(".load"));
+    V = convertValue(TD, IRB, V, IntTy);
     assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
     uint64_t RelOffset = Offset - NewAllocaBeginOffset;
     assert(TD.getTypeStoreSize(TargetTy) + RelOffset <=
-           TD.getTypeStoreSize(IntPromotionTy) &&
+           TD.getTypeStoreSize(IntTy) &&
            "Element load outside of alloca store");
     uint64_t ShAmt = 8*RelOffset;
     if (TD.isBigEndian())
-      ShAmt = 8*(TD.getTypeStoreSize(IntPromotionTy) -
+      ShAmt = 8*(TD.getTypeStoreSize(IntTy) -
                  TD.getTypeStoreSize(TargetTy) - RelOffset);
     if (ShAmt)
       V = IRB.CreateLShr(V, ShAmt, getName(".shift"));
-    if (TargetTy != IntPromotionTy) {
-      assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() &&
-             "Cannot extract to a larger integer!");
+    assert(TargetTy->getBitWidth() <= IntTy->getBitWidth() &&
+           "Cannot extract to a larger integer!");
+    if (TargetTy != IntTy)
       V = IRB.CreateTrunc(V, TargetTy, getName(".trunc"));
-    }
     return V;
   }
 
   StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
     IntegerType *Ty = cast<IntegerType>(V->getType());
-    if (Ty == IntPromotionTy)
-      return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
-
-    assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
+    assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
            "Cannot insert a larger integer!");
-    V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext"));
+    if (Ty != IntTy)
+      V = IRB.CreateZExt(V, IntTy, getName(".ext"));
     assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
     uint64_t RelOffset = Offset - NewAllocaBeginOffset;
     assert(TD.getTypeStoreSize(Ty) + RelOffset <=
-           TD.getTypeStoreSize(IntPromotionTy) &&
+           TD.getTypeStoreSize(IntTy) &&
            "Element store outside of alloca store");
     uint64_t ShAmt = 8*RelOffset;
     if (TD.isBigEndian())
-      ShAmt = 8*(TD.getTypeStoreSize(IntPromotionTy) - TD.getTypeStoreSize(Ty)
+      ShAmt = 8*(TD.getTypeStoreSize(IntTy) - TD.getTypeStoreSize(Ty)
                  - RelOffset);
     if (ShAmt)
       V = IRB.CreateShl(V, ShAmt, getName(".shift"));
 
-    APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth()).shl(ShAmt);
-    Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI,
-                                                     NewAI.getAlignment(),
-                                                     getName(".oldload")),
-                               Mask, getName(".mask"));
-    return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")),
-                                  &NewAI, NewAI.getAlignment());
+    if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
+      APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
+      Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                         getName(".oldload"));
+      Old = convertValue(TD, IRB, Old, IntTy);
+      Old = IRB.CreateAnd(Old, Mask, getName(".mask"));
+      V = IRB.CreateOr(Old, V, getName(".insert"));
+    }
+    V = convertValue(TD, IRB, V, NewAllocaTy);
+    return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
   }
 
   void deleteIfTriviallyDead(Value *V) {
@@ -2343,15 +2431,6 @@ private:
       Pass.DeadInsts.push_back(I);
   }
 
-  Value *getValueCast(IRBuilder<> &IRB, Value *V, Type *Ty) {
-    if (V->getType()->isIntegerTy() && Ty->isPointerTy())
-      return IRB.CreateIntToPtr(V, Ty);
-    if (V->getType()->isPointerTy() && Ty->isIntegerTy())
-      return IRB.CreatePtrToInt(V, Ty);
-
-    return IRB.CreateBitCast(V, Ty);
-  }
-
   bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
     Value *Result;
     if (LI.getType() == VecTy->getElementType() ||
@@ -2364,7 +2443,7 @@ private:
                                      getName(".load"));
     }
     if (Result->getType() != LI.getType())
-      Result = getValueCast(IRB, Result, LI.getType());
+      Result = convertValue(TD, IRB, Result, LI.getType());
     LI.replaceAllUsesWith(Result);
     Pass.DeadInsts.push_back(&LI);
 
@@ -2390,9 +2469,23 @@ private:
 
     if (VecTy)
       return rewriteVectorizedLoadInst(IRB, LI, OldOp);
-    if (IntPromotionTy)
+    if (IntTy && LI.getType()->isIntegerTy())
       return rewriteIntegerLoad(IRB, LI);
 
+    if (BeginOffset == NewAllocaBeginOffset &&
+        canConvertValue(TD, NewAllocaTy, LI.getType())) {
+      Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                           LI.isVolatile(), getName(".load"));
+      Value *NewV = convertValue(TD, IRB, NewLI, LI.getType());
+      LI.replaceAllUsesWith(NewV);
+      Pass.DeadInsts.push_back(&LI);
+
+      DEBUG(dbgs() << "          to: " << *NewLI << "\n");
+      return !LI.isVolatile();
+    }
+
+    assert(!IntTy && "Invalid load found with int-op widening enabled");
+
     Value *NewPtr = getAdjustedAllocaPtr(IRB,
                                          LI.getPointerOperand()->getType());
     LI.setOperand(0, NewPtr);
@@ -2409,13 +2502,13 @@ private:
     if (V->getType() == ElementTy ||
         BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
       if (V->getType() != ElementTy)
-        V = getValueCast(IRB, V, ElementTy);
+        V = convertValue(TD, IRB, V, ElementTy);
       LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                            getName(".load"));
       V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
                                   getName(".insert"));
     } else if (V->getType() != VecTy) {
-      V = getValueCast(IRB, V, VecTy);
+      V = convertValue(TD, IRB, V, VecTy);
     }
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
     Pass.DeadInsts.push_back(&SI);
@@ -2442,16 +2535,31 @@ private:
 
     if (VecTy)
       return rewriteVectorizedStoreInst(IRB, SI, OldOp);
-    if (IntPromotionTy)
+    Type *ValueTy = SI.getValueOperand()->getType();
+    if (IntTy && ValueTy->isIntegerTy())
       return rewriteIntegerStore(IRB, SI);
 
     // Strip all inbounds GEPs and pointer casts to try to dig out any root
     // alloca that should be re-examined after promoting this alloca.
-    if (SI.getValueOperand()->getType()->isPointerTy())
+    if (ValueTy->isPointerTy())
       if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand()
                                                   ->stripInBoundsOffsets()))
         Pass.PostPromotionWorklist.insert(AI);
 
+    if (BeginOffset == NewAllocaBeginOffset &&
+        canConvertValue(TD, ValueTy, NewAllocaTy)) {
+      Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy);
+      StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(),
+                                                SI.isVolatile());
+      (void)NewSI;
+      Pass.DeadInsts.push_back(&SI);
+
+      DEBUG(dbgs() << "          to: " << *NewSI << "\n");
+      return !SI.isVolatile();
+    }
+
+    assert(!IntTy && "Invalid store found with int-op widening enabled");
+
     Value *NewPtr = getAdjustedAllocaPtr(IRB,
                                          SI.getPointerOperand()->getType());
     SI.setOperand(1, NewPtr);
@@ -2487,10 +2595,11 @@ private:
 
     // If this doesn't map cleanly onto the alloca type, and that type isn't
     // a single value type, just emit a memset.
-    if (!VecTy && (BeginOffset != NewAllocaBeginOffset ||
-                   EndOffset != NewAllocaEndOffset ||
-                   !AllocaTy->isSingleValueType() ||
-                   !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
+    if (!VecTy && !IntTy &&
+        (BeginOffset != NewAllocaBeginOffset ||
+         EndOffset != NewAllocaEndOffset ||
+         !AllocaTy->isSingleValueType() ||
+         !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
       Type *SizeTy = II.getLength()->getType();
       Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
       CallInst *New
@@ -2508,32 +2617,24 @@ private:
     // a sensible representation for the alloca type. This is essentially
     // splatting the byte to a sufficiently wide integer, bitcasting to the
     // desired scalar type, and splatting it across any desired vector type.
+    uint64_t Size = EndOffset - BeginOffset;
     Value *V = II.getValue();
     IntegerType *VTy = cast<IntegerType>(V->getType());
-    Type *IntTy = Type::getIntNTy(VTy->getContext(),
-                                  TD.getTypeSizeInBits(ScalarTy));
-    if (TD.getTypeSizeInBits(ScalarTy) > VTy->getBitWidth())
-      V = IRB.CreateMul(IRB.CreateZExt(V, IntTy, getName(".zext")),
+    Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
+    if (Size*8 > VTy->getBitWidth())
+      V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
                         ConstantExpr::getUDiv(
-                          Constant::getAllOnesValue(IntTy),
+                          Constant::getAllOnesValue(SplatIntTy),
                           ConstantExpr::getZExt(
                             Constant::getAllOnesValue(V->getType()),
-                            IntTy)),
+                            SplatIntTy)),
                         getName(".isplat"));
-    if (V->getType() != ScalarTy) {
-      if (ScalarTy->isPointerTy())
-        V = IRB.CreateIntToPtr(V, ScalarTy);
-      else if (ScalarTy->isPrimitiveType() || ScalarTy->isVectorTy())
-        V = IRB.CreateBitCast(V, ScalarTy);
-      else if (ScalarTy->isIntegerTy())
-        llvm_unreachable("Computed different integer types with equal widths");
-      else
-        llvm_unreachable("Invalid scalar type");
-    }
 
     // If this is an element-wide memset of a vectorizable alloca, insert it.
     if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
                   EndOffset < NewAllocaEndOffset)) {
+      if (V->getType() != ScalarTy)
+        V = convertValue(TD, IRB, V, ScalarTy);
       StoreInst *Store = IRB.CreateAlignedStore(
         IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
                                                       NewAI.getAlignment(),
@@ -2546,18 +2647,20 @@ private:
       return true;
     }
 
-    // Splat to a vector if needed.
-    if (VectorType *VecTy = dyn_cast<VectorType>(AllocaTy)) {
-      VectorType *SplatSourceTy = VectorType::get(V->getType(), 1);
-      V = IRB.CreateShuffleVector(
-        IRB.CreateInsertElement(UndefValue::get(SplatSourceTy), V,
-                                IRB.getInt32(0), getName(".vsplat.insert")),
-        UndefValue::get(SplatSourceTy),
-        ConstantVector::getSplat(VecTy->getNumElements(), IRB.getInt32(0)),
-        getName(".vsplat.shuffle"));
-      assert(V->getType() == VecTy);
+    // If this is a memset on an alloca where we can widen stores, insert the
+    // set integer.
+    if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
+                  EndOffset < NewAllocaEndOffset)) {
+      assert(!II.isVolatile());
+      StoreInst *Store = insertInteger(IRB, V, BeginOffset);
+      (void)Store;
+      DEBUG(dbgs() << "          to: " << *Store << "\n");
+      return true;
     }
 
+    if (V->getType() != AllocaTy)
+      V = convertValue(TD, IRB, V, AllocaTy);
+
     Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
                                         II.isVolatile());
     (void)New;
@@ -2578,8 +2681,10 @@ private:
     const AllocaPartitioning::MemTransferOffsets &MTO
       = P.getMemTransferOffsets(II);
 
+    assert(OldPtr->getType()->isPointerTy() && "Must be a pointer type!");
+    unsigned AS = cast<PointerType>(OldPtr->getType())->getAddressSpace();
     // Compute the relative offset within the transfer.
-    unsigned IntPtrWidth = TD.getPointerSizeInBits();
+    unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
     APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
                                                        : MTO.SourceBegin));
 
@@ -2618,9 +2723,9 @@ private:
     // If this doesn't map cleanly onto the alloca type, and that type isn't
     // a single value type, just emit a memcpy.
     bool EmitMemCpy
-      = !VecTy && (BeginOffset != NewAllocaBeginOffset ||
-                   EndOffset != NewAllocaEndOffset ||
-                   !NewAI.getAllocatedType()->isSingleValueType());
+      = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset ||
+                             EndOffset != NewAllocaEndOffset ||
+                             !NewAI.getAllocatedType()->isSingleValueType());
 
     // If we're just going to emit a memcpy, the alloca hasn't changed, and the
     // size hasn't been shrunk based on analysis of the viable range, this is
@@ -2642,14 +2747,23 @@ private:
     if (Pass.DeadSplitInsts.insert(&II))
       Pass.DeadInsts.push_back(&II);
 
-    bool IsVectorElement = VecTy && (BeginOffset > NewAllocaBeginOffset ||
-                                     EndOffset < NewAllocaEndOffset);
+    bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+                         EndOffset == NewAllocaEndOffset;
+    bool IsVectorElement = VecTy && !IsWholeAlloca;
+    uint64_t Size = EndOffset - BeginOffset;
+    IntegerType *SubIntTy
+      = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
 
     Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
                               : II.getRawDest()->getType();
-    if (!EmitMemCpy)
-      OtherPtrTy = IsVectorElement ? VecTy->getElementType()->getPointerTo()
-                                   : NewAI.getType();
+    if (!EmitMemCpy) {
+      if (IsVectorElement)
+        OtherPtrTy = VecTy->getElementType()->getPointerTo();
+      else if (IntTy && !IsWholeAlloca)
+        OtherPtrTy = SubIntTy->getPointerTo();
+      else
+        OtherPtrTy = NewAI.getType();
+    }
 
     // Compute the other pointer, folding as much as possible to produce
     // a single, simple GEP in most cases.
@@ -2696,11 +2810,20 @@ private:
         IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
         getIndex(IRB, BeginOffset),
         getName(".copyextract"));
+    } else if (IntTy && !IsWholeAlloca && !IsDest) {
+      Src = extractInteger(IRB, SubIntTy, BeginOffset);
     } else {
       Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
                                   getName(".copyload"));
     }
 
+    if (IntTy && !IsWholeAlloca && IsDest) {
+      StoreInst *Store = insertInteger(IRB, Src, BeginOffset);
+      (void)Store;
+      DEBUG(dbgs() << "          to: " << *Store << "\n");
+      return true;
+    }
+
     if (IsVectorElement && IsDest) {
       // We have to insert into a loaded copy before storing.
       Src = IRB.CreateInsertElement(
@@ -2993,6 +3116,36 @@ private:
 };
 }
 
+/// \brief Strip aggregate type wrapping.
+///
+/// This removes no-op aggregate types wrapping an underlying type. It will
+/// strip as many layers of types as it can without changing either the type
+/// size or the allocated size.
+static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) {
+  if (Ty->isSingleValueType())
+    return Ty;
+
+  uint64_t AllocSize = DL.getTypeAllocSize(Ty);
+  uint64_t TypeSize = DL.getTypeSizeInBits(Ty);
+
+  Type *InnerTy;
+  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
+    InnerTy = ArrTy->getElementType();
+  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = DL.getStructLayout(STy);
+    unsigned Index = SL->getElementContainingOffset(0);
+    InnerTy = STy->getElementType(Index);
+  } else {
+    return Ty;
+  }
+
+  if (AllocSize > DL.getTypeAllocSize(InnerTy) ||
+      TypeSize > DL.getTypeSizeInBits(InnerTy))
+    return Ty;
+
+  return stripAggregateTypeWrapping(DL, InnerTy);
+}
+
 /// \brief Try to find a partition of the aggregate type passed in for a given
 /// offset and size.
 ///
@@ -3009,7 +3162,7 @@ private:
 static Type *getTypePartition(const DataLayout &TD, Type *Ty,
                               uint64_t Offset, uint64_t Size) {
   if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)
-    return Ty;
+    return stripAggregateTypeWrapping(TD, Ty);
 
   if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
     // We can't partition pointers...
@@ -3038,7 +3191,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
     assert(Offset == 0);
 
     if (Size == ElementSize)
-      return ElementTy;
+      return stripAggregateTypeWrapping(TD, ElementTy);
     assert(Size > ElementSize);
     uint64_t NumElements = Size / ElementSize;
     if (NumElements * ElementSize != Size)
@@ -3074,7 +3227,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
   assert(Offset == 0);
 
   if (Size == ElementSize)
-    return ElementTy;
+    return stripAggregateTypeWrapping(TD, ElementTy);
 
   StructType::element_iterator EI = STy->element_begin() + Index,
                                EE = STy->element_end();
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 73f53b7cecc..d86c4cbc9f6 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -133,295 +133,7 @@ static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
 // String and Memory LibCall Optimizations
 //===----------------------------------------------------------------------===//
 
-//===---------------------------------------===//
-// 'strcat' Optimizations
 namespace {
-struct StrCatOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strcat" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != B.getInt8PtrTy() ||
-        FT->getParamType(0) != FT->getReturnType() ||
-        FT->getParamType(1) != FT->getReturnType())
-      return 0;
-
-    // Extract some information from the instruction
-    Value *Dst = CI->getArgOperand(0);
-    Value *Src = CI->getArgOperand(1);
-
-    // See if we can get the length of the input string.
-    uint64_t Len = GetStringLength(Src);
-    if (Len == 0) return 0;
-    --Len;  // Unbias length.
-
-    // Handle the simple, do-nothing case: strcat(x, "") -> x
-    if (Len == 0)
-      return Dst;
-
-    // These optimizations require DataLayout.
-    if (!TD) return 0;
-
-    return EmitStrLenMemCpy(Src, Dst, Len, B);
-  }
-
-  Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
-    // We need to find the end of the destination string.  That's where the
-    // memory is to be moved to. We just generate a call to strlen.
-    Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
-    if (!DstLen)
-      return 0;
-
-    // Now that we have the destination's length, we must index into the
-    // destination's pointer to get the actual memcpy destination (end of
-    // the string .. we're concatenating).
-    Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
-
-    // We have enough information to now generate the memcpy call to do the
-    // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
-    B.CreateMemCpy(CpyDst, Src,
-                   ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
-    return Dst;
-  }
-};
-
-//===---------------------------------------===//
-// 'strncat' Optimizations
-
-struct StrNCatOpt : public StrCatOpt {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strncat" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 3 ||
-        FT->getReturnType() != B.getInt8PtrTy() ||
-        FT->getParamType(0) != FT->getReturnType() ||
-        FT->getParamType(1) != FT->getReturnType() ||
-        !FT->getParamType(2)->isIntegerTy())
-      return 0;
-
-    // Extract some information from the instruction
-    Value *Dst = CI->getArgOperand(0);
-    Value *Src = CI->getArgOperand(1);
-    uint64_t Len;
-
-    // We don't do anything if length is not constant
-    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
-      Len = LengthArg->getZExtValue();
-    else
-      return 0;
-
-    // See if we can get the length of the input string.
-    uint64_t SrcLen = GetStringLength(Src);
-    if (SrcLen == 0) return 0;
-    --SrcLen;  // Unbias length.
-
-    // Handle the simple, do-nothing cases:
-    // strncat(x, "", c) -> x
-    // strncat(x,  c, 0) -> x
-    if (SrcLen == 0 || Len == 0) return Dst;
-
-    // These optimizations require DataLayout.
-    if (!TD) return 0;
-
-    // We don't optimize this case
-    if (Len < SrcLen) return 0;
-
-    // strncat(x, s, c) -> strcat(x, s)
-    // s is constant so the strcat can be optimized further
-    return EmitStrLenMemCpy(Src, Dst, SrcLen, B);
-  }
-};
-
-//===---------------------------------------===//
-// 'strchr' Optimizations
-
-struct StrChrOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strchr" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != B.getInt8PtrTy() ||
-        FT->getParamType(0) != FT->getReturnType() ||
-        !FT->getParamType(1)->isIntegerTy(32))
-      return 0;
-
-    Value *SrcStr = CI->getArgOperand(0);
-
-    // If the second operand is non-constant, see if we can compute the length
-    // of the input string and turn this into memchr.
-    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
-    if (CharC == 0) {
-      // These optimizations require DataLayout.
-      if (!TD) return 0;
-
-      uint64_t Len = GetStringLength(SrcStr);
-      if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
-        return 0;
-
-      return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
-                        ConstantInt::get(TD->getIntPtrType(*Context), Len),
-                        B, TD, TLI);
-    }
-
-    // Otherwise, the character is a constant, see if the first argument is
-    // a string literal.  If so, we can constant fold.
-    StringRef Str;
-    if (!getConstantStringInfo(SrcStr, Str))
-      return 0;
-
-    // Compute the offset, make sure to handle the case when we're searching for
-    // zero (a weird way to spell strlen).
-    size_t I = CharC->getSExtValue() == 0 ?
-        Str.size() : Str.find(CharC->getSExtValue());
-    if (I == StringRef::npos) // Didn't find the char.  strchr returns null.
-      return Constant::getNullValue(CI->getType());
-
-    // strchr(s+n,c)  -> gep(s+n+i,c)
-    return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
-  }
-};
-
-//===---------------------------------------===//
-// 'strrchr' Optimizations
-
-struct StrRChrOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strrchr" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != B.getInt8PtrTy() ||
-        FT->getParamType(0) != FT->getReturnType() ||
-        !FT->getParamType(1)->isIntegerTy(32))
-      return 0;
-
-    Value *SrcStr = CI->getArgOperand(0);
-    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
-
-    // Cannot fold anything if we're not looking for a constant.
-    if (!CharC)
-      return 0;
-
-    StringRef Str;
-    if (!getConstantStringInfo(SrcStr, Str)) {
-      // strrchr(s, 0) -> strchr(s, 0)
-      if (TD && CharC->isZero())
-        return EmitStrChr(SrcStr, '\0', B, TD, TLI);
-      return 0;
-    }
-
-    // Compute the offset.
-    size_t I = CharC->getSExtValue() == 0 ?
-        Str.size() : Str.rfind(CharC->getSExtValue());
-    if (I == StringRef::npos) // Didn't find the char. Return null.
-      return Constant::getNullValue(CI->getType());
-
-    // strrchr(s+n,c) -> gep(s+n+i,c)
-    return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
-  }
-};
-
-//===---------------------------------------===//
-// 'strcmp' Optimizations
-
-struct StrCmpOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strcmp" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 ||
-        !FT->getReturnType()->isIntegerTy(32) ||
-        FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != B.getInt8PtrTy())
-      return 0;
-
-    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
-    if (Str1P == Str2P)      // strcmp(x,x)  -> 0
-      return ConstantInt::get(CI->getType(), 0);
-
-    StringRef Str1, Str2;
-    bool HasStr1 = getConstantStringInfo(Str1P, Str1);
-    bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
-    // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
-    if (HasStr1 && HasStr2)
-      return ConstantInt::get(CI->getType(), Str1.compare(Str2));
-
-    if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
-      return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
-                                      CI->getType()));
-
-    if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
-      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-
-    // strcmp(P, "x") -> memcmp(P, "x", 2)
-    uint64_t Len1 = GetStringLength(Str1P);
-    uint64_t Len2 = GetStringLength(Str2P);
-    if (Len1 && Len2) {
-      // These optimizations require DataLayout.
-      if (!TD) return 0;
-
-      return EmitMemCmp(Str1P, Str2P,
-                        ConstantInt::get(TD->getIntPtrType(*Context),
-                        std::min(Len1, Len2)), B, TD, TLI);
-    }
-
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'strncmp' Optimizations
-
-struct StrNCmpOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strncmp" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 3 ||
-        !FT->getReturnType()->isIntegerTy(32) ||
-        FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != B.getInt8PtrTy() ||
-        !FT->getParamType(2)->isIntegerTy())
-      return 0;
-
-    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
-    if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0
-      return ConstantInt::get(CI->getType(), 0);
-
-    // Get the length argument if it is constant.
-    uint64_t Length;
-    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
-      Length = LengthArg->getZExtValue();
-    else
-      return 0;
-
-    if (Length == 0) // strncmp(x,y,0)   -> 0
-      return ConstantInt::get(CI->getType(), 0);
-
-    if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
-      return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
-
-    StringRef Str1, Str2;
-    bool HasStr1 = getConstantStringInfo(Str1P, Str1);
-    bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
-    // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
-    if (HasStr1 && HasStr2) {
-      StringRef SubStr1 = Str1.substr(0, Length);
-      StringRef SubStr2 = Str2.substr(0, Length);
-      return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
-    }
-
-    if (HasStr1 && Str1.empty())  // strncmp("", x, n) -> -*x
-      return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
-                                      CI->getType()));
-
-    if (HasStr2 && Str2.empty())  // strncmp(x, "", n) -> *x
-      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-
-    return 0;
-  }
-};
-
-
 //===---------------------------------------===//
 // 'strcpy' Optimizations
 
@@ -636,9 +348,8 @@ struct StrToOpt : public LibCallOptimization {
     if (isa<ConstantPointerNull>(EndPtr)) {
       // With a null EndPtr, this function won't capture the main argument.
       // It would be readonly too, except that it still may write to errno.
-      Attributes::Builder B;
-      B.addAttribute(Attributes::NoCapture);
-      CI->addAttribute(1, Attributes::get(B));
+      CI->addAttribute(1, Attributes::get(Callee->getContext(),
+                                          Attributes::NoCapture));
     }
 
     return 0;
@@ -1564,8 +1275,6 @@ namespace {
 
     StringMap<LibCallOptimization*> Optimizations;
     // String and Memory LibCall Optimizations
-    StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
-    StrCmpOpt StrCmp; StrNCmpOpt StrNCmp;
     StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
     StpCpyOpt StpCpy; StpCpyOpt StpCpyChk;
     StrNCpyOpt StrNCpy;
@@ -1639,12 +1348,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
 /// we know.
 void SimplifyLibCalls::InitOptimizations() {
   // String and Memory LibCall Optimizations
-  Optimizations["strcat"] = &StrCat;
-  Optimizations["strncat"] = &StrNCat;
-  Optimizations["strchr"] = &StrChr;
-  Optimizations["strrchr"] = &StrRChr;
-  Optimizations["strcmp"] = &StrCmp;
-  Optimizations["strncmp"] = &StrNCmp;
   Optimizations["strcpy"] = &StrCpy;
   Optimizations["strncpy"] = &StrNCpy;
   Optimizations["stpcpy"] = &StpCpy;
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 26240d4dfe4..fa2faa2dad8 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -41,9 +41,10 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[1] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI),
@@ -67,9 +68,10 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[1] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI),
@@ -95,7 +97,8 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
   AttributeWithIndex AWI =
-    AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+    AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                            ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   Type *I8Ptr = B.getInt8PtrTy();
   Type *I32Ty = B.getInt32Ty();
@@ -117,10 +120,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[2] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI),
@@ -147,8 +151,9 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
   Type *I8Ptr = B.getInt8PtrTy();
   Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI),
                                          I8Ptr, I8Ptr, I8Ptr, NULL);
@@ -169,8 +174,9 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
   Type *I8Ptr = B.getInt8PtrTy();
   Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI),
                                           I8Ptr, I8Ptr, I8Ptr,
@@ -193,7 +199,8 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI;
-  AWI = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                Attributes::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
                                          AttrListPtr::get(AWI),
@@ -221,7 +228,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI;
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                ArrayRef<Attributes::AttrVal>(AVs, 2));
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI),
                                          B.getInt8PtrTy(),
@@ -246,10 +254,11 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[2] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI),
@@ -325,8 +334,9 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
 
   Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI),
                                        B.getInt32Ty(),
@@ -347,8 +357,9 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
   Constant *F;
   if (File->getType()->isPointerTy())
     F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI),
@@ -378,9 +389,10 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture);
-  AWI[2] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
   StringRef FPutsName = TLI->getName(LibFunc::fputs);
   Constant *F;
   if (File->getType()->isPointerTy())
@@ -409,9 +421,10 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(4, Attributes::NoCapture);
-  AWI[2] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture);
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   StringRef FWriteName = TLI->getName(LibFunc::fwrite);
   Constant *F;
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index c3f72b13afc..620209bccbc 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_library(LLVMTransformUtils
   SimplifyCFG.cpp
   SimplifyIndVar.cpp
   SimplifyInstructions.cpp
+  SimplifyLibCalls.cpp
   UnifyFunctionExitNodes.cpp
   Utils.cpp
   ValueMapper.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index e2932501f31..7ba9f6d9d25 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -98,10 +98,14 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
         Anew->addAttr( OldFunc->getAttributes()
                        .getParamAttributes(I->getArgNo() + 1));
     NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttr(0, OldFunc->getAttributes()
+                           .addAttr(NewFunc->getContext(),
+                                    AttrListPtr::ReturnIndex,
+                                    OldFunc->getAttributes()
                                      .getRetAttributes()));
     NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttr(~0, OldFunc->getAttributes()
+                           .addAttr(NewFunc->getContext(),
+                                    AttrListPtr::FunctionIndex,
+                                    OldFunc->getAttributes()
                                      .getFnAttributes()));
 
   }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a954d82c05b..9729687a83e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -806,7 +806,8 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
                                           const DataLayout *TD) {
   assert(V->getType()->isPointerTy() &&
          "getOrEnforceKnownAlignment expects a pointer!");
-  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+  unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();
+  unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 64;
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   ComputeMaskedBits(V, KnownZero, KnownOne, TD);
   unsigned TrailZ = KnownZero.countTrailingOnes();
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 930555424de..f35cbbdde5e 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -45,10 +45,10 @@
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/TargetTransformInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
 #include <csetjmp>
 #include <set>
 using namespace llvm;
@@ -70,15 +70,14 @@ namespace {
     Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
     bool useExpensiveEHSupport;
 
-    // We peek in TLI to grab the target's jmp_buf size and alignment
-    const TargetLowering *TLI;
+    // We peek in STTI to grab the target's jmp_buf size and alignment
+    const ScalarTargetTransformInfo *STTI;
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit LowerInvoke(const TargetLowering *tli = NULL,
-                         bool useExpensiveEHSupport = ExpensiveEHSupport)
+    explicit LowerInvoke(bool useExpensiveEHSupport = ExpensiveEHSupport)
       : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
-        TLI(tli) {
+        STTI(0) {
       initializeLowerInvokePass(*PassRegistry::getPassRegistry());
     }
     bool doInitialization(Module &M);
@@ -108,21 +107,24 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
 char &llvm::LowerInvokePassID = LowerInvoke::ID;
 
 // Public Interface To the LowerInvoke pass.
-FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
-  return new LowerInvoke(TLI, ExpensiveEHSupport);
+FunctionPass *llvm::createLowerInvokePass() {
+  return new LowerInvoke(ExpensiveEHSupport);
 }
-FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
-                                          bool useExpensiveEHSupport) {
-  return new LowerInvoke(TLI, useExpensiveEHSupport);
+FunctionPass *llvm::createLowerInvokePass(bool useExpensiveEHSupport) {
+  return new LowerInvoke(useExpensiveEHSupport);
 }
 
 // doInitialization - Make sure that there is a prototype for abort in the
 // current module.
 bool LowerInvoke::doInitialization(Module &M) {
+  TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+  if (TTI)
+    STTI = TTI->getScalarTargetTransformInfo();
+
   Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
   if (useExpensiveEHSupport) {
     // Insert a type for the linked list of jump buffers.
-    unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
+    unsigned JBSize = STTI ? STTI->getJumpBufSize() : 0;
     JBSize = JBSize ? JBSize : 200;
     Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
 
@@ -430,7 +432,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
     // Create an alloca for the incoming jump buffer ptr and the new jump buffer
     // that needs to be restored on all exits from the function.  This is an
     // alloca because the value needs to be live across invokes.
-    unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
+    unsigned Align = STTI ? STTI->getJumpBufAlignment() : 0;
     AllocaInst *JmpBuf =
       new AllocaInst(JBLinkTy, 0, Align,
                      "jblink", F.begin()->begin());
@@ -575,6 +577,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
 }
 
 bool LowerInvoke::runOnFunction(Function &F) {
+  TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+  if (TTI)
+    STTI = TTI->getScalarTargetTransformInfo();
+
   if (useExpensiveEHSupport)
     return insertExpensiveEHSupport(F);
   else
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index af8d1128523..a008da67e92 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -76,6 +76,8 @@ namespace {
       // Comparing pointers is ok as we only rely on the order for uniquing.
       return Value < RHS.Value;
     }
+
+    bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
   };
 
 class SimplifyCFGOpt {
@@ -564,11 +566,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
 /// in the list that match the specified block.
 static void EliminateBlockCases(BasicBlock *BB,
                               std::vector<ValueEqualityComparisonCase> &Cases) {
-  for (unsigned i = 0, e = Cases.size(); i != e; ++i)
-    if (Cases[i].Dest == BB) {
-      Cases.erase(Cases.begin()+i);
-      --i; --e;
-    }
+  Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
 }
 
 /// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
@@ -695,7 +693,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
         SI->removeCase(i);
       }
     }
-    if (HasWeight)
+    if (HasWeight && Weights.size() >= 2)
       SI->setMetadata(LLVMContext::MD_prof,
                       MDBuilder(SI->getParent()->getContext()).
                       createBranchWeights(Weights));
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
new file mode 100644
index 00000000000..bd28ec35273
--- /dev/null
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -0,0 +1,579 @@
+//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification.  If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/DataLayout.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+
+using namespace llvm;
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class LibCallOptimization {
+protected:
+  Function *Caller;
+  const DataLayout *TD;
+  const TargetLibraryInfo *TLI;
+  LLVMContext* Context;
+public:
+  LibCallOptimization() { }
+  virtual ~LibCallOptimization() {}
+
+  /// callOptimizer - This pure virtual method is implemented by base classes to
+  /// do various optimizations.  If this returns null then no transformation was
+  /// performed.  If it returns CI, then it transformed the call and CI is to be
+  /// deleted.  If it returns something else, replace CI with the new value and
+  /// delete CI.
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+    =0;
+
+  Value *optimizeCall(CallInst *CI, const DataLayout *TD,
+                      const TargetLibraryInfo *TLI, IRBuilder<> &B) {
+    Caller = CI->getParent()->getParent();
+    this->TD = TD;
+    this->TLI = TLI;
+    if (CI->getCalledFunction())
+      Context = &CI->getCalledFunction()->getContext();
+
+    // We never change the calling convention.
+    if (CI->getCallingConv() != llvm::CallingConv::C)
+      return NULL;
+
+    return callOptimizer(CI->getCalledFunction(), CI, B);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Fortified Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct FortifiedLibCallOptimization : public LibCallOptimization {
+protected:
+  virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
+			  bool isString) const = 0;
+};
+
+struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
+  CallInst *CI;
+
+  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+    if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+      return true;
+    if (ConstantInt *SizeCI =
+                           dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
+      if (SizeCI->isAllOnesValue())
+        return true;
+      if (isString) {
+        uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
+        // If the length is 0 we don't know how long it is and so we can't
+        // remove the check.
+        if (Len == 0) return false;
+        return SizeCI->getZExtValue() >= Len;
+      }
+      if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+                                                  CI->getArgOperand(SizeArgOp)))
+        return SizeCI->getZExtValue() >= Arg->getZExtValue();
+    }
+    return false;
+  }
+};
+
+struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    this->CI = CI;
+    FunctionType *FT = Callee->getFunctionType();
+    LLVMContext &Context = CI->getParent()->getContext();
+
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return 0;
+
+    if (isFoldable(3, 2, false)) {
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     CI->getArgOperand(2), 1);
+      return CI->getArgOperand(0);
+    }
+    return 0;
+  }
+};
+
+struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    this->CI = CI;
+    FunctionType *FT = Callee->getFunctionType();
+    LLVMContext &Context = CI->getParent()->getContext();
+
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return 0;
+
+    if (isFoldable(3, 2, false)) {
+      B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+                      CI->getArgOperand(2), 1);
+      return CI->getArgOperand(0);
+    }
+    return 0;
+  }
+};
+
+struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    this->CI = CI;
+    FunctionType *FT = Callee->getFunctionType();
+    LLVMContext &Context = CI->getParent()->getContext();
+
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return 0;
+
+    if (isFoldable(3, 2, false)) {
+      Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+                                   false);
+      B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+      return CI->getArgOperand(0);
+    }
+    return 0;
+  }
+};
+
+struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    this->CI = CI;
+    StringRef Name = Callee->getName();
+    FunctionType *FT = Callee->getFunctionType();
+    LLVMContext &Context = CI->getParent()->getContext();
+
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        FT->getParamType(2) != TD->getIntPtrType(Context))
+      return 0;
+
+    // If a) we don't have any length information, or b) we know this will
+    // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+    // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+    // TODO: It might be nice to get a maximum length out of the possible
+    // string lengths for varying.
+    if (isFoldable(2, 1, true)) {
+      Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
+                              TLI, Name.substr(2, 6));
+      return Ret;
+    }
+    return 0;
+  }
+};
+
+struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    this->CI = CI;
+    StringRef Name = Callee->getName();
+    FunctionType *FT = Callee->getFunctionType();
+    LLVMContext &Context = CI->getParent()->getContext();
+
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return 0;
+
+    if (isFoldable(3, 2, false)) {
+      Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                               CI->getArgOperand(2), B, TD, TLI,
+                               Name.substr(2, 7));
+      return Ret;
+    }
+    return 0;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// String and Memory Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct StrCatOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strcat" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        FT->getParamType(1) != FT->getReturnType())
+      return 0;
+
+    // Extract some information from the instruction
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+
+    // See if we can get the length of the input string.
+    uint64_t Len = GetStringLength(Src);
+    if (Len == 0) return 0;
+    --Len;  // Unbias length.
+
+    // Handle the simple, do-nothing case: strcat(x, "") -> x
+    if (Len == 0)
+      return Dst;
+
+    // These optimizations require DataLayout.
+    if (!TD) return 0;
+
+    return emitStrLenMemCpy(Src, Dst, Len, B);
+  }
+
+  Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
+                          IRBuilder<> &B) {
+    // We need to find the end of the destination string.  That's where the
+    // memory is to be moved to. We just generate a call to strlen.
+    Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+    if (!DstLen)
+      return 0;
+
+    // Now that we have the destination's length, we must index into the
+    // destination's pointer to get the actual memcpy destination (end of
+    // the string .. we're concatenating).
+    Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+    // We have enough information to now generate the memcpy call to do the
+    // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
+    B.CreateMemCpy(CpyDst, Src,
+                   ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+    return Dst;
+  }
+};
+
+struct StrNCatOpt : public StrCatOpt {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strncat" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        FT->getParamType(1) != FT->getReturnType() ||
+        !FT->getParamType(2)->isIntegerTy())
+      return 0;
+
+    // Extract some information from the instruction
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+    uint64_t Len;
+
+    // We don't do anything if length is not constant
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+      Len = LengthArg->getZExtValue();
+    else
+      return 0;
+
+    // See if we can get the length of the input string.
+    uint64_t SrcLen = GetStringLength(Src);
+    if (SrcLen == 0) return 0;
+    --SrcLen;  // Unbias length.
+
+    // Handle the simple, do-nothing cases:
+    // strncat(x, "", c) -> x
+    // strncat(x,  c, 0) -> x
+    if (SrcLen == 0 || Len == 0) return Dst;
+
+    // These optimizations require DataLayout.
+    if (!TD) return 0;
+
+    // We don't optimize this case
+    if (Len < SrcLen) return 0;
+
+    // strncat(x, s, c) -> strcat(x, s)
+    // s is constant so the strcat can be optimized further
+    return emitStrLenMemCpy(Src, Dst, SrcLen, B);
+  }
+};
+
+struct StrChrOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strchr" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
+      return 0;
+
+    Value *SrcStr = CI->getArgOperand(0);
+
+    // If the second operand is non-constant, see if we can compute the length
+    // of the input string and turn this into memchr.
+    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    if (CharC == 0) {
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      uint64_t Len = GetStringLength(SrcStr);
+      if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
+        return 0;
+
+      return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+                        ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                        B, TD, TLI);
+    }
+
+    // Otherwise, the character is a constant, see if the first argument is
+    // a string literal.  If so, we can constant fold.
+    StringRef Str;
+    if (!getConstantStringInfo(SrcStr, Str))
+      return 0;
+
+    // Compute the offset, make sure to handle the case when we're searching for
+    // zero (a weird way to spell strlen).
+    size_t I = CharC->getSExtValue() == 0 ?
+        Str.size() : Str.find(CharC->getSExtValue());
+    if (I == StringRef::npos) // Didn't find the char.  strchr returns null.
+      return Constant::getNullValue(CI->getType());
+
+    // strchr(s+n,c)  -> gep(s+n+i,c)
+    return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+  }
+};
+
+struct StrRChrOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strrchr" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
+      return 0;
+
+    Value *SrcStr = CI->getArgOperand(0);
+    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+    // Cannot fold anything if we're not looking for a constant.
+    if (!CharC)
+      return 0;
+
+    StringRef Str;
+    if (!getConstantStringInfo(SrcStr, Str)) {
+      // strrchr(s, 0) -> strchr(s, 0)
+      if (TD && CharC->isZero())
+        return EmitStrChr(SrcStr, '\0', B, TD, TLI);
+      return 0;
+    }
+
+    // Compute the offset.
+    size_t I = CharC->getSExtValue() == 0 ?
+        Str.size() : Str.rfind(CharC->getSExtValue());
+    if (I == StringRef::npos) // Didn't find the char. Return null.
+      return Constant::getNullValue(CI->getType());
+
+    // strrchr(s+n,c) -> gep(s+n+i,c)
+    return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+  }
+};
+
+struct StrCmpOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strcmp" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy())
+      return 0;
+
+    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+    if (Str1P == Str2P)      // strcmp(x,x)  -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    StringRef Str1, Str2;
+    bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+    bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+    // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
+    if (HasStr1 && HasStr2)
+      return ConstantInt::get(CI->getType(), Str1.compare(Str2));
+
+    if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+      return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+                                      CI->getType()));
+
+    if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+    // strcmp(P, "x") -> memcmp(P, "x", 2)
+    uint64_t Len1 = GetStringLength(Str1P);
+    uint64_t Len2 = GetStringLength(Str2P);
+    if (Len1 && Len2) {
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      return EmitMemCmp(Str1P, Str2P,
+                        ConstantInt::get(TD->getIntPtrType(*Context),
+                        std::min(Len1, Len2)), B, TD, TLI);
+    }
+
+    return 0;
+  }
+};
+
+struct StrNCmpOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strncmp" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        !FT->getParamType(2)->isIntegerTy())
+      return 0;
+
+    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+    if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    // Get the length argument if it is constant.
+    uint64_t Length;
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+      Length = LengthArg->getZExtValue();
+    else
+      return 0;
+
+    if (Length == 0) // strncmp(x,y,0)   -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+      return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
+
+    StringRef Str1, Str2;
+    bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+    bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+    // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
+    if (HasStr1 && HasStr2) {
+      StringRef SubStr1 = Str1.substr(0, Length);
+      StringRef SubStr2 = Str2.substr(0, Length);
+      return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
+    }
+
+    if (HasStr1 && Str1.empty())  // strncmp("", x, n) -> -*x
+      return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+                                      CI->getType()));
+
+    if (HasStr2 && Str2.empty())  // strncmp(x, "", n) -> *x
+      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+    return 0;
+  }
+};
+
+} // End anonymous namespace.
+
+namespace llvm {
+
+class LibCallSimplifierImpl {
+  const DataLayout *TD;
+  const TargetLibraryInfo *TLI;
+  StringMap<LibCallOptimization*> Optimizations;
+
+  // Fortified library call optimizations.
+  MemCpyChkOpt MemCpyChk;
+  MemMoveChkOpt MemMoveChk;
+  MemSetChkOpt MemSetChk;
+  StrCpyChkOpt StrCpyChk;
+  StrNCpyChkOpt StrNCpyChk;
+
+  // String and memory library call optimizations.
+  StrCatOpt StrCat;
+  StrNCatOpt StrNCat;
+  StrChrOpt StrChr;
+  StrRChrOpt StrRChr;
+  StrCmpOpt StrCmp;
+  StrNCmpOpt StrNCmp;
+
+  void initOptimizations();
+public:
+  LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI) {
+    this->TD = TD;
+    this->TLI = TLI;
+  }
+
+  Value *optimizeCall(CallInst *CI);
+};
+
+void LibCallSimplifierImpl::initOptimizations() {
+  // Fortified library call optimizations.
+  Optimizations["__memcpy_chk"] = &MemCpyChk;
+  Optimizations["__memmove_chk"] = &MemMoveChk;
+  Optimizations["__memset_chk"] = &MemSetChk;
+  Optimizations["__strcpy_chk"] = &StrCpyChk;
+  Optimizations["__stpcpy_chk"] = &StrCpyChk;
+  Optimizations["__strncpy_chk"] = &StrNCpyChk;
+  Optimizations["__stpncpy_chk"] = &StrNCpyChk;
+  Optimizations["strcmp"] = &StrCmp;
+  Optimizations["strncmp"] = &StrNCmp;
+
+  // String and memory library call optimizations.
+  Optimizations["strcat"] = &StrCat;
+  Optimizations["strncat"] = &StrNCat;
+  Optimizations["strchr"] = &StrChr;
+  Optimizations["strrchr"] = &StrRChr;
+}
+
+Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
+  if (Optimizations.empty())
+    initOptimizations();
+
+  Function *Callee = CI->getCalledFunction();
+  LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+  if (LCO) {
+    IRBuilder<> Builder(CI);
+    return LCO->optimizeCall(CI, TD, TLI, Builder);
+  }
+  return 0;
+}
+
+LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
+                                     const TargetLibraryInfo *TLI) {
+  Impl = new LibCallSimplifierImpl(TD, TLI);
+}
+
+LibCallSimplifier::~LibCallSimplifier() {
+  delete Impl;
+}
+
+Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+  return Impl->optimizeCall(CI);
+}
+
+}
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index 0625ef3f09c..4f55fb203bd 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -7,11 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the AttributesList class and Attribute utilities.
+// This file implements the Attributes, AttributeImpl, AttrBuilder,
+// AttributeListImpl, and AttrListPtr classes.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Attributes.h"
+#include "AttributesImpl.h"
 #include "LLVMContextImpl.h"
 #include "llvm/Type.h"
 #include "llvm/ADT/StringExtras.h"
@@ -27,27 +29,23 @@ using namespace llvm;
 // Attributes Implementation
 //===----------------------------------------------------------------------===//
 
-Attributes::Attributes(uint64_t Val) : Attrs(Val) {}
-
-Attributes::Attributes(AttributesImpl *A) : Attrs(A->Bits) {}
-
-Attributes::Attributes(const Attributes &A) : Attrs(A.Attrs) {}
-
-// FIXME: This is temporary until we have implemented the uniquified version of
-// AttributesImpl.
-Attributes Attributes::get(Attributes::Builder &B) {
-  return Attributes(B.Bits);
+Attributes Attributes::get(LLVMContext &Context, ArrayRef<AttrVal> Vals) {
+  AttrBuilder B;
+  for (ArrayRef<AttrVal>::iterator I = Vals.begin(), E = Vals.end();
+       I != E; ++I)
+    B.addAttribute(*I);
+  return Attributes::get(Context, B);
 }
 
-Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) {
+Attributes Attributes::get(LLVMContext &Context, AttrBuilder &B) {
   // If there are no attributes, return an empty Attributes class.
-  if (B.Bits == 0)
+  if (!B.hasAttributes())
     return Attributes();
 
   // Otherwise, build a key to look up the existing attributes.
   LLVMContextImpl *pImpl = Context.pImpl;
   FoldingSetNodeID ID;
-  ID.AddInteger(B.Bits);
+  ID.AddInteger(B.Raw());
 
   void *InsertPoint;
   AttributesImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
@@ -55,7 +53,7 @@ Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) {
   if (!PA) {
     // If we didn't find any existing attributes of the same shape then create a
     // new one and insert it.
-    PA = new AttributesImpl(B.Bits);
+    PA = new AttributesImpl(B.Raw());
     pImpl->AttrsSet.InsertNode(PA, InsertPoint);
   }
 
@@ -64,18 +62,22 @@ Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) {
 }
 
 bool Attributes::hasAttribute(AttrVal Val) const {
-  return Attrs.hasAttribute(Val);
+  return Attrs && Attrs->hasAttribute(Val);
+}
+
+bool Attributes::hasAttributes() const {
+  return Attrs && Attrs->hasAttributes();
 }
 
 bool Attributes::hasAttributes(const Attributes &A) const {
-  return Attrs.hasAttributes(A);
+  return Attrs && Attrs->hasAttributes(A);
 }
 
 /// This returns the alignment field of an attribute as a byte alignment value.
 unsigned Attributes::getAlignment() const {
   if (!hasAttribute(Attributes::Alignment))
     return 0;
-  return 1U << ((Attrs.getAlignment() >> 16) - 1);
+  return 1U << ((Attrs->getAlignment() >> 16) - 1);
 }
 
 /// This returns the stack alignment field of an attribute as a byte alignment
@@ -83,46 +85,21 @@ unsigned Attributes::getAlignment() const {
 unsigned Attributes::getStackAlignment() const {
   if (!hasAttribute(Attributes::StackAlignment))
     return 0;
-  return 1U << ((Attrs.getStackAlignment() >> 26) - 1);
-}
-
-bool Attributes::isEmptyOrSingleton() const {
-  return Attrs.isEmptyOrSingleton();
-}
-
-Attributes Attributes::operator | (const Attributes &A) const {
-  return Attributes(Raw() | A.Raw());
-}
-Attributes Attributes::operator & (const Attributes &A) const {
-  return Attributes(Raw() & A.Raw());
-}
-Attributes Attributes::operator ^ (const Attributes &A) const {
-  return Attributes(Raw() ^ A.Raw());
-}
-Attributes &Attributes::operator |= (const Attributes &A) {
-  Attrs.Bits |= A.Raw();
-  return *this;
-}
-Attributes &Attributes::operator &= (const Attributes &A) {
-  Attrs.Bits &= A.Raw();
-  return *this;
-}
-Attributes Attributes::operator ~ () const {
-  return Attributes(~Raw());
+  return 1U << ((Attrs->getStackAlignment() >> 26) - 1);
 }
 
 uint64_t Attributes::Raw() const {
-  return Attrs.Bits;
+  return Attrs ? Attrs->Raw() : 0;
 }
 
 Attributes Attributes::typeIncompatible(Type *Ty) {
-  Attributes::Builder Incompatible;
-  
+  AttrBuilder Incompatible;
+
   if (!Ty->isIntegerTy())
     // Attributes that only apply to integers.
     Incompatible.addAttribute(Attributes::SExt)
       .addAttribute(Attributes::ZExt);
-  
+
   if (!Ty->isPointerTy())
     // Attributes that only apply to pointers.
     Incompatible.addAttribute(Attributes::ByVal)
@@ -130,8 +107,46 @@ Attributes Attributes::typeIncompatible(Type *Ty) {
       .addAttribute(Attributes::NoAlias)
       .addAttribute(Attributes::NoCapture)
       .addAttribute(Attributes::StructRet);
-  
-  return Attributes(Incompatible.Bits); // FIXME: Use Attributes::get().
+
+  return Attributes::get(Ty->getContext(), Incompatible);
+}
+
+/// encodeLLVMAttributesForBitcode - This returns an integer containing an
+/// encoding of all the LLVM attributes found in the given attribute bitset.
+/// Any change to this encoding is a breaking change to bitcode compatibility.
+uint64_t Attributes::encodeLLVMAttributesForBitcode(Attributes Attrs) {
+  // FIXME: It doesn't make sense to store the alignment information as an
+  // expanded out value, we should store it as a log2 value.  However, we can't
+  // just change that here without breaking bitcode compatibility.  If this ever
+  // becomes a problem in practice, we should introduce new tag numbers in the
+  // bitcode file and have those tags use a more efficiently encoded alignment
+  // field.
+
+  // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit
+  // log2 encoded value. Shift the bits above the alignment up by 11 bits.
+  uint64_t EncodedAttrs = Attrs.Raw() & 0xffff;
+  if (Attrs.hasAttribute(Attributes::Alignment))
+    EncodedAttrs |= Attrs.getAlignment() << 16;
+  EncodedAttrs |= (Attrs.Raw() & (0xfffULL << 21)) << 11;
+  return EncodedAttrs;
+}
+
+/// decodeLLVMAttributesForBitcode - This returns an attribute bitset containing
+/// the LLVM attributes that have been decoded from the given integer.  This
+/// function must stay in sync with 'encodeLLVMAttributesForBitcode'.
+Attributes Attributes::decodeLLVMAttributesForBitcode(LLVMContext &C,
+                                                      uint64_t EncodedAttrs) {
+  // The alignment is stored as a 16-bit raw value from bits 31--16.  We shift
+  // the bits above 31 down by 11 bits.
+  unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16;
+  assert((!Alignment || isPowerOf2_32(Alignment)) &&
+         "Alignment must be a power of two.");
+
+  AttrBuilder B(EncodedAttrs & 0xffff);
+  if (Alignment)
+    B.addAlignmentAttr(Alignment);
+  B.addRawValue((EncodedAttrs & (0xfffULL << 32)) >> 11);
+  return Attributes::get(C, B);
 }
 
 std::string Attributes::getAsString() const {
@@ -203,61 +218,72 @@ std::string Attributes::getAsString() const {
 }
 
 //===----------------------------------------------------------------------===//
-// Attributes::Builder Implementation
+// AttrBuilder Implementation
 //===----------------------------------------------------------------------===//
 
-Attributes::Builder &Attributes::Builder::
-addAttribute(Attributes::AttrVal Val) {
+AttrBuilder &AttrBuilder::addAttribute(Attributes::AttrVal Val){
   Bits |= AttributesImpl::getAttrMask(Val);
   return *this;
 }
 
-void Attributes::Builder::addAlignmentAttr(unsigned Align) {
-  if (Align == 0) return;
+AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
+  Bits |= Val;
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) {
+  if (Align == 0) return *this;
   assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
   assert(Align <= 0x40000000 && "Alignment too large.");
   Bits |= (Log2_32(Align) + 1) << 16;
+  return *this;
 }
-void Attributes::Builder::addStackAlignmentAttr(unsigned Align) {
+AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align){
   // Default alignment, allow the target to define how to align it.
-  if (Align == 0) return;
+  if (Align == 0) return *this;
   assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
   assert(Align <= 0x100 && "Alignment too large.");
   Bits |= (Log2_32(Align) + 1) << 26;
+  return *this;
 }
 
-Attributes::Builder &Attributes::Builder::
-removeAttribute(Attributes::AttrVal Val) {
+AttrBuilder &AttrBuilder::removeAttribute(Attributes::AttrVal Val) {
   Bits &= ~AttributesImpl::getAttrMask(Val);
   return *this;
 }
 
-void Attributes::Builder::removeAttributes(const Attributes &A) {
+AttrBuilder &AttrBuilder::addAttributes(const Attributes &A) {
+  Bits |= A.Raw();
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::removeAttributes(const Attributes &A){
   Bits &= ~A.Raw();
+  return *this;
 }
 
-bool Attributes::Builder::hasAttribute(Attributes::AttrVal A) const {
+bool AttrBuilder::hasAttribute(Attributes::AttrVal A) const {
   return Bits & AttributesImpl::getAttrMask(A);
 }
 
-bool Attributes::Builder::hasAttributes() const {
+bool AttrBuilder::hasAttributes() const {
   return Bits != 0;
 }
-bool Attributes::Builder::hasAttributes(const Attributes &A) const {
+bool AttrBuilder::hasAttributes(const Attributes &A) const {
   return Bits & A.Raw();
 }
-bool Attributes::Builder::hasAlignmentAttr() const {
+bool AttrBuilder::hasAlignmentAttr() const {
   return Bits & AttributesImpl::getAttrMask(Attributes::Alignment);
 }
 
-uint64_t Attributes::Builder::getAlignment() const {
+uint64_t AttrBuilder::getAlignment() const {
   if (!hasAlignmentAttr())
     return 0;
   return 1U <<
     (((Bits & AttributesImpl::getAttrMask(Attributes::Alignment)) >> 16) - 1);
 }
 
-uint64_t Attributes::Builder::getStackAlignment() const {
+uint64_t AttrBuilder::getStackAlignment() const {
   if (!hasAlignmentAttr())
     return 0;
   return 1U <<
@@ -322,10 +348,6 @@ uint64_t AttributesImpl::getStackAlignment() const {
   return Bits & getAttrMask(Attributes::StackAlignment);
 }
 
-bool AttributesImpl::isEmptyOrSingleton() const {
-  return (Bits & (Bits - 1)) == 0;
-}
-
 //===----------------------------------------------------------------------===//
 // AttributeListImpl Definition
 //===----------------------------------------------------------------------===//
@@ -341,19 +363,19 @@ static ManagedStatic<sys::SmartMutex<true> > ALMutex;
 
 class AttributeListImpl : public FoldingSetNode {
   sys::cas_flag RefCount;
-  
+
   // AttributesList is uniqued, these should not be publicly available.
   void operator=(const AttributeListImpl &) LLVM_DELETED_FUNCTION;
   AttributeListImpl(const AttributeListImpl &) LLVM_DELETED_FUNCTION;
   ~AttributeListImpl();                        // Private implementation
 public:
   SmallVector<AttributeWithIndex, 4> Attrs;
-  
+
   AttributeListImpl(ArrayRef<AttributeWithIndex> attrs)
     : Attrs(attrs.begin(), attrs.end()) {
     RefCount = 0;
   }
-  
+
   void AddRef() {
     sys::SmartScopedLock<true> Lock(*ALMutex);
     ++RefCount;
@@ -366,7 +388,7 @@ public:
     if (new_val == 0)
       delete this;
   }
-  
+
   void Profile(FoldingSetNodeID &ID) const {
     Profile(ID, Attrs);
   }
@@ -377,50 +399,49 @@ public:
     }
   }
 };
-}
+
+} // end llvm namespace
 
 AttributeListImpl::~AttributeListImpl() {
   // NOTE: Lock must be acquired by caller.
   AttributesLists->RemoveNode(this);
 }
 
-
 AttrListPtr AttrListPtr::get(ArrayRef<AttributeWithIndex> Attrs) {
   // If there are no attributes then return a null AttributesList pointer.
   if (Attrs.empty())
     return AttrListPtr();
-  
+
 #ifndef NDEBUG
   for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
-    assert(Attrs[i].Attrs.hasAttributes() && 
+    assert(Attrs[i].Attrs.hasAttributes() &&
            "Pointless attribute!");
     assert((!i || Attrs[i-1].Index < Attrs[i].Index) &&
            "Misordered AttributesList!");
   }
 #endif
-  
+
   // Otherwise, build a key to look up the existing attributes.
   FoldingSetNodeID ID;
   AttributeListImpl::Profile(ID, Attrs);
   void *InsertPos;
-  
+
   sys::SmartScopedLock<true> Lock(*ALMutex);
-  
+
   AttributeListImpl *PAL =
     AttributesLists->FindNodeOrInsertPos(ID, InsertPos);
-  
+
   // If we didn't find any existing attributes of the same shape then
   // create a new one and insert it.
   if (!PAL) {
     PAL = new AttributeListImpl(Attrs);
     AttributesLists->InsertNode(PAL, InsertPos);
   }
-  
+
   // Return the AttributesList that we found or created.
   return AttrListPtr(PAL);
 }
 
-
 //===----------------------------------------------------------------------===//
 // AttrListPtr Method Implementations
 //===----------------------------------------------------------------------===//
@@ -430,7 +451,7 @@ AttrListPtr::AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {
 }
 
 AttrListPtr::AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {
-  if (AttrList) AttrList->AddRef();  
+  if (AttrList) AttrList->AddRef();
 }
 
 const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
@@ -446,7 +467,7 @@ AttrListPtr::~AttrListPtr() {
   if (AttrList) AttrList->DropRef();
 }
 
-/// getNumSlots - Return the number of slots used in this attribute list. 
+/// getNumSlots - Return the number of slots used in this attribute list.
 /// This is the number of arguments that have an attribute set on them
 /// (including the function itself).
 unsigned AttrListPtr::getNumSlots() const {
@@ -460,13 +481,12 @@ const AttributeWithIndex &AttrListPtr::getSlot(unsigned Slot) const {
   return AttrList->Attrs[Slot];
 }
 
-
-/// getAttributes - The attributes for the specified index are
-/// returned.  Attributes for the result are denoted with Idx = 0.
-/// Function notes are denoted with idx = ~0.
+/// getAttributes - The attributes for the specified index are returned.
+/// Attributes for the result are denoted with Idx = 0.  Function notes are
+/// denoted with idx = ~0.
 Attributes AttrListPtr::getAttributes(unsigned Idx) const {
   if (AttrList == 0) return Attributes();
-  
+
   const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
   for (unsigned i = 0, e = Attrs.size(); i != e && Attrs[i].Index <= Idx; ++i)
     if (Attrs[i].Index == Idx)
@@ -497,7 +517,8 @@ Attributes &AttrListPtr::getAttributesAtIndex(unsigned i) const {
   return AttrList->Attrs[i].Attrs;
 }
 
-AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
+AttrListPtr AttrListPtr::addAttr(LLVMContext &C, unsigned Idx,
+                                 Attributes Attrs) const {
   Attributes OldAttrs = getAttributes(Idx);
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment.
@@ -507,11 +528,12 @@ AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
   assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
          "Attempt to change alignment!");
 #endif
-  
-  Attributes NewAttrs = OldAttrs | Attrs;
-  if (NewAttrs == OldAttrs)
+
+  AttrBuilder NewAttrs =
+    AttrBuilder(OldAttrs).addAttributes(Attrs);
+  if (NewAttrs == AttrBuilder(OldAttrs))
     return *this;
-  
+
   SmallVector<AttributeWithIndex, 8> NewAttrList;
   if (AttrList == 0)
     NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
@@ -524,21 +546,24 @@ AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
 
     // If there are attributes already at this index, merge them in.
     if (i != e && OldAttrList[i].Index == Idx) {
-      Attrs |= OldAttrList[i].Attrs;
+      Attrs =
+        Attributes::get(C, AttrBuilder(Attrs).
+                        addAttributes(OldAttrList[i].Attrs));
       ++i;
     }
-    
+
     NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
-    
+
     // Copy attributes for arguments after this one.
-    NewAttrList.insert(NewAttrList.end(), 
+    NewAttrList.insert(NewAttrList.end(),
                        OldAttrList.begin()+i, OldAttrList.end());
   }
-  
+
   return get(NewAttrList);
 }
 
-AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
+AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx,
+                                    Attributes Attrs) const {
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment.
   // For now, say we can't pass in alignment, which no current use does.
@@ -546,31 +571,33 @@ AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
          "Attempt to exclude alignment!");
 #endif
   if (AttrList == 0) return AttrListPtr();
-  
+
   Attributes OldAttrs = getAttributes(Idx);
-  Attributes NewAttrs = OldAttrs & ~Attrs;
-  if (NewAttrs == OldAttrs)
+  AttrBuilder NewAttrs =
+    AttrBuilder(OldAttrs).removeAttributes(Attrs);
+  if (NewAttrs == AttrBuilder(OldAttrs))
     return *this;
 
   SmallVector<AttributeWithIndex, 8> NewAttrList;
   const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
   unsigned i = 0, e = OldAttrList.size();
-  
+
   // Copy attributes for arguments before this one.
   for (; i != e && OldAttrList[i].Index < Idx; ++i)
     NewAttrList.push_back(OldAttrList[i]);
-  
+
   // If there are attributes already at this index, merge them in.
   assert(OldAttrList[i].Index == Idx && "Attribute isn't set?");
-  Attrs = OldAttrList[i].Attrs & ~Attrs;
+  Attrs = Attributes::get(C, AttrBuilder(OldAttrList[i].Attrs).
+                          removeAttributes(Attrs));
   ++i;
-  if (Attrs)  // If any attributes left for this parameter, add them.
+  if (Attrs.hasAttributes()) // If any attributes left for this param, add them.
     NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
-  
+
   // Copy attributes for arguments after this one.
-  NewAttrList.insert(NewAttrList.end(), 
+  NewAttrList.insert(NewAttrList.end(),
                      OldAttrList.begin()+i, OldAttrList.end());
-  
+
   return get(NewAttrList);
 }
 
@@ -578,8 +605,8 @@ void AttrListPtr::dump() const {
   dbgs() << "PAL[ ";
   for (unsigned i = 0; i < getNumSlots(); ++i) {
     const AttributeWithIndex &PAWI = getSlot(i);
-    dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} ";
+    dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs.getAsString() << "} ";
   }
-  
+
   dbgs() << "]\n";
 }
diff --git a/include/llvm/AttributesImpl.h b/lib/VMCore/AttributesImpl.h
index eea11a7011a..b4a0f615f36 100644
--- a/include/llvm/AttributesImpl.h
+++ b/lib/VMCore/AttributesImpl.h
@@ -22,9 +22,7 @@ namespace llvm {
 class Attributes;
 
 class AttributesImpl : public FoldingSetNode {
-  friend class Attributes;
   uint64_t Bits;                // FIXME: We will be expanding this.
-
 public:
   AttributesImpl(uint64_t bits) : Bits(bits) {}
 
@@ -36,7 +34,7 @@ public:
   uint64_t getAlignment() const;
   uint64_t getStackAlignment() const;
 
-  bool isEmptyOrSingleton() const;
+  uint64_t Raw() const { return Bits; } // FIXME: Remove.
 
   static uint64_t getAttrMask(uint64_t Val);
 
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 094ca755132..5fff460e8bc 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -148,7 +148,8 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
   if (NewFn)
     F = NewFn;
   if (unsigned id = F->getIntrinsicID())
-    F->setAttributes(Intrinsic::getAttributes((Intrinsic::ID)id));
+    F->setAttributes(Intrinsic::getAttributes(F->getContext(),
+                                              (Intrinsic::ID)id));
   return Upgraded;
 }
 
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 6c309679740..ba807fcacca 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -33,6 +33,7 @@ add_llvm_library(LLVMCore
   PrintModulePass.cpp
   Type.cpp
   TypeFinder.cpp
+  TargetTransformInfo.cpp
   Use.cpp
   User.cpp
   Value.cpp
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 90ecdaecf41..847bc134ddb 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1381,14 +1381,20 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
 void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
   const AttrListPtr PAL = Func->getAttributes();
-  const AttrListPtr PALnew = PAL.addAttr(~0U, Attributes(PA));
+  AttrBuilder B(PA);
+  const AttrListPtr PALnew =
+    PAL.addAttr(Func->getContext(), AttrListPtr::FunctionIndex,
+                Attributes::get(Func->getContext(), B));
   Func->setAttributes(PALnew);
 }
 
 void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
   const AttrListPtr PAL = Func->getAttributes();
-  const AttrListPtr PALnew = PAL.removeAttr(~0U, Attributes(PA));
+  AttrBuilder B(PA);
+  const AttrListPtr PALnew =
+    PAL.removeAttr(Func->getContext(), AttrListPtr::FunctionIndex,
+                   Attributes::get(Func->getContext(), B));
   Func->setAttributes(PALnew);
 }
 
@@ -1458,11 +1464,15 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
 }
 
 void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
-  unwrap<Argument>(Arg)->addAttr(Attributes(PA));
+  Argument *A = unwrap<Argument>(Arg);
+  AttrBuilder B(PA);
+  A->addAttr(Attributes::get(A->getContext(), B));
 }
 
 void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
-  unwrap<Argument>(Arg)->removeAttr(Attributes(PA));
+  Argument *A = unwrap<Argument>(Arg);
+  AttrBuilder B(PA);
+  A->removeAttr(Attributes::get(A->getContext(), B));
 }
 
 LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
@@ -1474,8 +1484,10 @@ LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
   
 
 void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
-  unwrap<Argument>(Arg)->addAttr(
-          Attributes::constructAlignmentFromInt(align));
+  AttrBuilder B;
+  B.addAlignmentAttr(align);
+  unwrap<Argument>(Arg)->addAttr(Attributes::
+                                 get(unwrap<Argument>(Arg)->getContext(), B));
 }
 
 /*--.. Operations on basic blocks ..........................................--*/
@@ -1664,23 +1676,28 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
 void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, 
                            LLVMAttribute PA) {
   CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  AttrBuilder B(PA);
   Call.setAttributes(
-    Call.getAttributes().addAttr(index, Attributes(PA)));
+    Call.getAttributes().addAttr(Call->getContext(), index,
+                                 Attributes::get(Call->getContext(), B)));
 }
 
 void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, 
                               LLVMAttribute PA) {
   CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  AttrBuilder B(PA);
   Call.setAttributes(
-    Call.getAttributes().removeAttr(index, Attributes(PA)));
+    Call.getAttributes().removeAttr(Call->getContext(), index,
+                                    Attributes::get(Call->getContext(), B)));
 }
 
 void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, 
                                 unsigned align) {
   CallSite Call = CallSite(unwrap<Instruction>(Instr));
-  Call.setAttributes(
-    Call.getAttributes().addAttr(index, 
-        Attributes::constructAlignmentFromInt(align)));
+  AttrBuilder B;
+  B.addAlignmentAttr(align);
+  Call.setAttributes(Call.getAttributes().addAttr(Call->getContext(), index,
+                                       Attributes::get(Call->getContext(), B)));
 }
 
 /*--.. Operations on call instructions (only) ..............................--*/
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 5c2a03ce091..9c4f2d93995 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -185,7 +185,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage,
 
   // Ensure intrinsics have the right parameter attributes.
   if (unsigned IID = getIntrinsicID())
-    setAttributes(Intrinsic::getAttributes(Intrinsic::ID(IID)));
+    setAttributes(Intrinsic::getAttributes(getContext(), Intrinsic::ID(IID)));
 
 }
 
@@ -249,13 +249,13 @@ void Function::dropAllReferences() {
 
 void Function::addAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.addAttr(i, attr);
+  PAL = PAL.addAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 void Function::removeAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.removeAttr(i, attr);
+  PAL = PAL.removeAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 74c0c6e1d97..13c4a5d2574 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -332,21 +332,22 @@ CallInst::CallInst(const CallInst &CI)
 
 void CallInst::addAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.addAttr(i, attr);
+  PAL = PAL.addAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 void CallInst::removeAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.removeAttr(i, attr);
+  PAL = PAL.removeAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 bool CallInst::hasFnAttr(Attributes::AttrVal A) const {
-  if (AttributeList.getParamAttributes(~0U).hasAttribute(A))
+  if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex)
+      .hasAttribute(A))
     return true;
   if (const Function *F = getCalledFunction())
-    return F->getParamAttributes(~0U).hasAttribute(A);
+    return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A);
   return false;
 }
 
@@ -571,10 +572,11 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
 }
 
 bool InvokeInst::hasFnAttr(Attributes::AttrVal A) const {
-  if (AttributeList.getParamAttributes(~0U).hasAttribute(A))
+  if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex).
+      hasAttribute(A))
     return true;
   if (const Function *F = getCalledFunction())
-    return F->getParamAttributes(~0U).hasAttribute(A);
+    return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A);
   return false;
 }
 
@@ -588,13 +590,13 @@ bool InvokeInst::paramHasAttr(unsigned i, Attributes::AttrVal A) const {
 
 void InvokeInst::addAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.addAttr(i, attr);
+  PAL = PAL.addAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 void InvokeInst::removeAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.removeAttr(i, attr);
+  PAL = PAL.removeAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index a86363b632a..74247bdde13 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -97,9 +97,11 @@ LLVMContextImpl::~LLVMContextImpl() {
 
   // Destroy attributes.
   for (FoldingSetIterator<AttributesImpl> I = AttrsSet.begin(),
-         E = AttrsSet.end(); I != E; ++I)
-    delete &*I;
-  
+         E = AttrsSet.end(); I != E;) {
+    FoldingSetIterator<AttributesImpl> Elem = I++;
+    delete &*Elem;
+  }
+
   // Destroy MDNodes.  ~MDNode can move and remove nodes between the MDNodeSet
   // and the NonUniquedMDNodes sets, so copy the values out first.
   SmallVector<MDNode*, 8> MDNodes;
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 524f7e54bb4..ee31814c055 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -16,9 +16,9 @@
 #define LLVM_LLVMCONTEXT_IMPL_H
 
 #include "llvm/LLVMContext.h"
+#include "AttributesImpl.h"
 #include "ConstantsContext.h"
 #include "LeaksContext.h"
-#include "llvm/AttributesImpl.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Metadata.h"
diff --git a/lib/VMCore/TargetTransformInfo.cpp b/lib/VMCore/TargetTransformInfo.cpp
new file mode 100644
index 00000000000..3af0222a211
--- /dev/null
+++ b/lib/VMCore/TargetTransformInfo.cpp
@@ -0,0 +1,27 @@
+//===- llvm/VMCore/TargetTransformInfo.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TargetTransformInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+/// Default ctor.
+///
+/// @note This has to exist, because this is a pass, but it should never be
+/// used.
+TargetTransformInfo::TargetTransformInfo() : ImmutablePass(ID) {
+  report_fatal_error("Bad TargetTransformInfo ctor used.  "
+                     "Tool did not specify a TargetTransformInfo to use?");
+}
+
+INITIALIZE_PASS(TargetTransformInfo, "TargetTransformInfo",
+                "Target Transform Info", false, true)
+char TargetTransformInfo::ID = 0;
+
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 53744b48691..fd629b485aa 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -567,9 +567,10 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty,
             Attrs.hasAttribute(Attributes::AlwaysInline)), "Attributes "
           "'noinline and alwaysinline' are incompatible!", V);
 
-  Attributes TypeI = Attrs & Attributes::typeIncompatible(Ty);
-  Assert1(!TypeI, "Wrong type for attribute " +
-          TypeI.getAsString(), V);
+  Assert1(!AttrBuilder(Attrs).
+            hasAttributes(Attributes::typeIncompatible(Ty)),
+          "Wrong types for attribute: " +
+          Attributes::typeIncompatible(Ty).getAsString(), V);
 
   if (PointerType *PTy = dyn_cast<PointerType>(Ty))
     Assert1(!Attrs.hasAttribute(Attributes::ByVal) ||
@@ -614,10 +615,10 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT,
   }
 
   Attributes FAttrs = Attrs.getFnAttributes();
-  Attributes::Builder NotFn(FAttrs);
+  AttrBuilder NotFn(FAttrs);
   NotFn.removeFunctionOnlyAttrs();
   Assert1(!NotFn.hasAttributes(), "Attributes '" +
-          Attributes::get(NotFn).getAsString() +
+          Attributes::get(V->getContext(), NotFn).getAsString() +
           "' do not apply to the function!", V);
 
   // Check for mutually incompatible attributes.
diff --git a/test/Analysis/DependenceAnalysis/Banerjee.ll b/test/Analysis/DependenceAnalysis/Banerjee.ll
new file mode 100644
index 00000000000..8865ee94016
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -0,0 +1,595 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'Banerjee.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (long int i = 1; i <= 10; i++)
+;;    for (long int j = 1; j <= 10; j++) {
+;;      A[10*i + j] = ...
+;;      ... = A[10*i + j - 1];
+
+define void @banerjee0(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc7
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
+  %i.03 = phi i64 [ 1, %entry ], [ %inc8, %for.inc7 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 1, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %i.03, 10
+  %add = add nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %mul4 = mul nsw i64 %i.03, 10
+  %add5 = add nsw i64 %mul4, %j.02
+  %sub = add nsw i64 %add5, -1
+  %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
+  %0 = load i64* %arrayidx6, align 8
+; CHECK: da analyze - flow [<= <>]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %0, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 11
+  br i1 %exitcond, label %for.body3, label %for.inc7
+
+for.inc7:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 10
+  %inc8 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc8, 11
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end9
+
+for.end9:                                         ; preds = %for.inc7
+  ret void
+}
+
+
+;;  for (long int i = 1; i <= n; i++)
+;;    for (long int j = 1; j <= m; j++) {
+;;      A[10*i + j] = ...
+;;      ... = A[10*i + j - 1];
+
+define void @banerjee1(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp4 = icmp sgt i64 %n, 0
+  br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end9
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  %0 = add i64 %n, 1
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc7
+  %B.addr.06 = phi i64* [ %B.addr.1.lcssa, %for.inc7 ], [ %B, %for.cond1.preheader.preheader ]
+  %i.05 = phi i64 [ %inc8, %for.inc7 ], [ 1, %for.cond1.preheader.preheader ]
+  %1 = add i64 %m, 1
+  %cmp21 = icmp sgt i64 %m, 0
+  br i1 %cmp21, label %for.body3.preheader, label %for.inc7
+
+for.body3.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.preheader, %for.body3
+  %j.03 = phi i64 [ %inc, %for.body3 ], [ 1, %for.body3.preheader ]
+  %B.addr.12 = phi i64* [ %incdec.ptr, %for.body3 ], [ %B.addr.06, %for.body3.preheader ]
+  %mul = mul nsw i64 %i.05, 10
+  %add = add nsw i64 %mul, %j.03
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %mul4 = mul nsw i64 %i.05, 10
+  %add5 = add nsw i64 %mul4, %j.03
+  %sub = add nsw i64 %add5, -1
+  %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
+  %2 = load i64* %arrayidx6, align 8
+; CHECK: da analyze - flow [* <>]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.12, i64 1
+  store i64 %2, i64* %B.addr.12, align 8
+  %inc = add nsw i64 %j.03, 1
+  %exitcond = icmp eq i64 %inc, %1
+  br i1 %exitcond, label %for.inc7.loopexit, label %for.body3
+
+for.inc7.loopexit:                                ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.06, i64 %m
+  br label %for.inc7
+
+for.inc7:                                         ; preds = %for.inc7.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i64* [ %B.addr.06, %for.cond1.preheader ], [ %scevgep, %for.inc7.loopexit ]
+  %inc8 = add nsw i64 %i.05, 1
+  %exitcond7 = icmp eq i64 %inc8, %0
+  br i1 %exitcond7, label %for.end9.loopexit, label %for.cond1.preheader
+
+for.end9.loopexit:                                ; preds = %for.inc7
+  br label %for.end9
+
+for.end9:                                         ; preds = %for.end9.loopexit, %entry
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 10; i++)
+;;    for (long int j = 0; j < 10; j++) {
+;;      A[10*i + j] = 0;
+;;      *B++ = A[10*i + j + 100];
+
+define void @banerjee2(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %i.03, 10
+  %add = add nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %mul4 = mul nsw i64 %i.03, 10
+  %add5 = add nsw i64 %mul4, %j.02
+  %add6 = add nsw i64 %add5, 100
+  %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
+  %0 = load i64* %arrayidx7, align 8
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %0, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body3, label %for.inc8
+
+for.inc8:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 10
+  %inc9 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc9, 10
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end10
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 10; i++)
+;;    for (long int j = 0; j < 10; j++) {
+;;      A[10*i + j] = ...
+;;      ... = A[10*i + j + 99];
+
+define void @banerjee3(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %i.03, 10
+  %add = add nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %mul4 = mul nsw i64 %i.03, 10
+  %add5 = add nsw i64 %mul4, %j.02
+  %add6 = add nsw i64 %add5, 99
+  %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
+  %0 = load i64* %arrayidx7, align 8
+; CHECK: da analyze - flow [> >]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %0, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body3, label %for.inc8
+
+for.inc8:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 10
+  %inc9 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc9, 10
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end10
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 10; i++)
+;;    for (long int j = 0; j < 10; j++) {
+;;      A[10*i + j] = ...
+;;      ... = A[10*i + j - 100];
+
+define void @banerjee4(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc7
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %i.03, 10
+  %add = add nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %mul4 = mul nsw i64 %i.03, 10
+  %add5 = add nsw i64 %mul4, %j.02
+  %sub = add nsw i64 %add5, -100
+  %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
+  %0 = load i64* %arrayidx6, align 8
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %0, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body3, label %for.inc7
+
+for.inc7:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 10
+  %inc8 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc8, 10
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end9
+
+for.end9:                                         ; preds = %for.inc7
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 10; i++)
+;;    for (long int j = 0; j < 10; j++) {
+;;      A[10*i + j] = ...
+;;      ... = A[10*i + j - 99];
+
+define void @banerjee5(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc7
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %i.03, 10
+  %add = add nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %mul4 = mul nsw i64 %i.03, 10
+  %add5 = add nsw i64 %mul4, %j.02
+  %sub = add nsw i64 %add5, -99
+  %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
+  %0 = load i64* %arrayidx6, align 8
+; CHECK: da analyze - flow [< <]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %0, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body3, label %for.inc7
+
+for.inc7:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 10
+  %inc8 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc8, 10
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end9
+
+for.end9:                                         ; preds = %for.inc7
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 10; i++)
+;;    for (long int j = 0; j < 10; j++) {
+;;      A[10*i + j] = ...
+;;      ... = A[10*i + j + 9];
+
+define void @banerjee6(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %i.03, 10
+  %add = add nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %mul4 = mul nsw i64 %i.03, 10
+  %add5 = add nsw i64 %mul4, %j.02
+  %add6 = add nsw i64 %add5, 9
+  %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
+  %0 = load i64* %arrayidx7, align 8
+; CHECK: da analyze - flow [=> <>]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %0, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body3, label %for.inc8
+
+for.inc8:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 10
+  %inc9 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc9, 10
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end10
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 10; i++)
+;;    for (long int j = 0; j < 10; j++) {
+;;      A[10*i + j] = ...
+;;      ... = A[10*i + j + 10];
+
+define void @banerjee7(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %i.03, 10
+  %add = add nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %mul4 = mul nsw i64 %i.03, 10
+  %add5 = add nsw i64 %mul4, %j.02
+  %add6 = add nsw i64 %add5, 10
+  %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
+  %0 = load i64* %arrayidx7, align 8
+; CHECK: da analyze - flow [> <=]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %0, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body3, label %for.inc8
+
+for.inc8:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 10
+  %inc9 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc9, 10
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end10
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 10; i++)
+;;    for (long int j = 0; j < 10; j++) {
+;;      A[10*i + j] = ...
+;;      ... = A[10*i + j + 11];
+
+define void @banerjee8(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %i.03, 10
+  %add = add nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %mul4 = mul nsw i64 %i.03, 10
+  %add5 = add nsw i64 %mul4, %j.02
+  %add6 = add nsw i64 %add5, 11
+  %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
+  %0 = load i64* %arrayidx7, align 8
+; CHECK: da analyze - flow [> <>]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %0, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body3, label %for.inc8
+
+for.inc8:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 10
+  %inc9 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc9, 10
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end10
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 20; i++)
+;;    for (long int j = 0; j < 20; j++) {
+;;      A[30*i + 500*j] = ...
+;;      ... = A[i - 500*j + 11];
+
+define void @banerjee9(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %i.03, 30
+  %mul4 = mul nsw i64 %j.02, 500
+  %add = add nsw i64 %mul, %mul4
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %0 = mul i64 %j.02, -500
+  %sub = add i64 %i.03, %0
+  %add6 = add nsw i64 %sub, 11
+  %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
+  %1 = load i64* %arrayidx7, align 8
+; CHECK: da analyze - flow [<= =|<]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %1, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 20
+  br i1 %exitcond, label %for.body3, label %for.inc8
+
+for.inc8:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 20
+  %inc9 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc9, 20
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end10
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 20; i++)
+;;    for (long int j = 0; j < 20; j++) {
+;;      A[i + 500*j] = ...
+;;      ... = A[i - 500*j + 11];
+
+define void @banerjee10(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc7
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %j.02, 500
+  %add = add nsw i64 %i.03, %mul
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %0 = mul i64 %j.02, -500
+  %sub = add i64 %i.03, %0
+  %add5 = add nsw i64 %sub, 11
+  %arrayidx6 = getelementptr inbounds i64* %A, i64 %add5
+  %1 = load i64* %arrayidx6, align 8
+; CHECK: da analyze - flow [<> =]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %1, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 20
+  br i1 %exitcond, label %for.body3, label %for.inc7
+
+for.inc7:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 20
+  %inc8 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc8, 20
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end9
+
+for.end9:                                         ; preds = %for.inc7
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 20; i++)
+;;    for (long int j = 0; j < 20; j++) {
+;;      A[300*i + j] = ...
+;;      ... = A[250*i - j + 11];
+
+define void @banerjee11(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc7
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %i.03, 300
+  %add = add nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %mul4 = mul nsw i64 %i.03, 250
+  %sub = sub nsw i64 %mul4, %j.02
+  %add5 = add nsw i64 %sub, 11
+  %arrayidx6 = getelementptr inbounds i64* %A, i64 %add5
+  %0 = load i64* %arrayidx6, align 8
+; CHECK: da analyze - flow [<= <>]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %0, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 20
+  br i1 %exitcond, label %for.body3, label %for.inc7
+
+for.inc7:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 20
+  %inc8 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc8, 20
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end9
+
+for.end9:                                         ; preds = %for.inc7
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 20; i++)
+;;    for (long int j = 0; j < 20; j++) {
+;;      A[100*i + j] = ...
+;;      ... = A[100*i - j + 11];
+
+define void @banerjee12(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc7
+  %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i64* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %mul = mul nsw i64 %i.03, 100
+  %add = add nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i64* %A, i64 %add
+  store i64 0, i64* %arrayidx, align 8
+  %mul4 = mul nsw i64 %i.03, 100
+  %sub = sub nsw i64 %mul4, %j.02
+  %add5 = add nsw i64 %sub, 11
+  %arrayidx6 = getelementptr inbounds i64* %A, i64 %add5
+  %0 = load i64* %arrayidx6, align 8
+; CHECK: da analyze - flow [= <>]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
+  store i64 %0, i64* %B.addr.11, align 8
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 20
+  br i1 %exitcond, label %for.body3, label %for.inc7
+
+for.inc7:                                         ; preds = %for.body3
+  %scevgep = getelementptr i64* %B.addr.04, i64 20
+  %inc8 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc8, 20
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end9
+
+for.end9:                                         ; preds = %for.inc7
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/Coupled.ll b/test/Analysis/DependenceAnalysis/Coupled.ll
new file mode 100644
index 00000000000..60163fe7c2d
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/Coupled.ll
@@ -0,0 +1,509 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'Coupled.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;; for (long int i = 0; i < 50; i++)
+;;   A[i][i] = ...
+;;   ... = A[i + 10][i + 9]
+
+define void @couple0([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  store i32 %conv, i32* %arrayidx1, align 4
+  %add = add nsw i64 %i.02, 9
+  %add2 = add nsw i64 %i.02, 10
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %add2, i64 %add
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i < 50; i++)
+;;   A[i][i] = ...
+;;   ... = A[i + 9][i + 9]
+
+define void @couple1([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  store i32 %conv, i32* %arrayidx1, align 4
+  %add = add nsw i64 %i.02, 9
+  %add2 = add nsw i64 %i.02, 9
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %add2, i64 %add
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - consistent flow [-9]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i < 50; i++)
+;;   A[3*i - 6][3*i - 6] = ...
+;;   ... = A[i][i]
+
+define void @couple2([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul nsw i64 %i.02, 3
+  %sub = add nsw i64 %mul, -6
+  %mul1 = mul nsw i64 %i.02, 3
+  %sub2 = add nsw i64 %mul1, -6
+  %arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %sub2, i64 %sub
+  store i32 %conv, i32* %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - flow [*|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i < 50; i++)
+;;   A[3*i - 6][3*i - 5] = ...
+;;   ... = A[i][i]
+
+define void @couple3([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul nsw i64 %i.02, 3
+  %sub = add nsw i64 %mul, -5
+  %mul1 = mul nsw i64 %i.02, 3
+  %sub2 = add nsw i64 %mul1, -6
+  %arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %sub2, i64 %sub
+  store i32 %conv, i32* %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i < 50; i++)
+;;   A[3*i - 6][3*i - n] = ...
+;;   ... = A[i][i]
+
+define void @couple4([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul nsw i64 %i.02, 3
+  %conv1 = sext i32 %n to i64
+  %sub = sub nsw i64 %mul, %conv1
+  %mul2 = mul nsw i64 %i.02, 3
+  %sub3 = add nsw i64 %mul2, -6
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %sub3, i64 %sub
+  store i32 %conv, i32* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx6, align 4
+; CHECK: da analyze - flow [*|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i < 50; i++)
+;;   A[3*i - n + 1][3*i - n] = ...
+;;   ... = A[i][i]
+
+define void @couple5([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul nsw i64 %i.02, 3
+  %conv1 = sext i32 %n to i64
+  %sub = sub nsw i64 %mul, %conv1
+  %mul2 = mul nsw i64 %i.02, 3
+  %conv3 = sext i32 %n to i64
+  %sub4 = sub nsw i64 %mul2, %conv3
+  %add = add nsw i64 %sub4, 1
+  %arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %add, i64 %sub
+  store i32 %conv, i32* %arrayidx5, align 4
+  %arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx7, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i < 50; i++)
+;;   A[i][3*i - 6] = ...
+;;   ... = A[i][i]
+
+define void @couple6([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul nsw i64 %i.02, 3
+  %sub = add nsw i64 %mul, -6
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %sub
+  store i32 %conv, i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - flow [=|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i < 50; i++)
+;;   A[i][3*i - 5] = ...
+;;   ... = A[i][i]
+
+define void @couple7([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul nsw i64 %i.02, 3
+  %sub = add nsw i64 %mul, -5
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %sub
+  store i32 %conv, i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i <= 15; i++)
+;;   A[3*i - 18][3 - i] = ...
+;;   ... = A[i][i]
+
+define void @couple8([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %sub = sub nsw i64 3, %i.02
+  %mul = mul nsw i64 %i.02, 3
+  %sub1 = add nsw i64 %mul, -18
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub
+  store i32 %conv, i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 16
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i <= 15; i++)
+;;   A[3*i - 18][2 - i] = ...
+;;   ... = A[i][i]
+
+define void @couple9([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %sub = sub nsw i64 2, %i.02
+  %mul = mul nsw i64 %i.02, 3
+  %sub1 = add nsw i64 %mul, -18
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub
+  store i32 %conv, i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 16
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i <= 15; i++)
+;;   A[3*i - 18][6 - i] = ...
+;;   ... = A[i][i]
+
+define void @couple10([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %sub = sub nsw i64 6, %i.02
+  %mul = mul nsw i64 %i.02, 3
+  %sub1 = add nsw i64 %mul, -18
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub
+  store i32 %conv, i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - flow [>] splitable!
+; CHECK: da analyze - split level = 1, iteration = 3!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 16
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i <= 15; i++)
+;;   A[3*i - 18][18 - i] = ...
+;;   ... = A[i][i]
+
+define void @couple11([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %sub = sub nsw i64 18, %i.02
+  %mul = mul nsw i64 %i.02, 3
+  %sub1 = add nsw i64 %mul, -18
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub
+  store i32 %conv, i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - flow [=|<] splitable!
+; CHECK: da analyze - split level = 1, iteration = 9!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 16
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i <= 12; i++)
+;;   A[3*i - 18][22 - i] = ...
+;;   ... = A[i][i]
+
+define void @couple12([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %sub = sub nsw i64 22, %i.02
+  %mul = mul nsw i64 %i.02, 3
+  %sub1 = add nsw i64 %mul, -18
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub
+  store i32 %conv, i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - flow [<] splitable!
+; CHECK: da analyze - split level = 1, iteration = 11!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 13
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i < 12; i++)
+;;   A[3*i - 18][22 - i] = ...
+;;   ... = A[i][i]
+
+define void @couple13([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %sub = sub nsw i64 22, %i.02
+  %mul = mul nsw i64 %i.02, 3
+  %sub1 = add nsw i64 %mul, -18
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %A, i64 %sub1, i64 %sub
+  store i32 %conv, i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 12
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i < 100; i++)
+;;   A[3*i - 18][18 - i][i] = ...
+;;   ... = A[i][i][i]
+
+define void @couple14([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %sub = sub nsw i64 18, %i.02
+  %mul = mul nsw i64 %i.02, 3
+  %sub1 = add nsw i64 %mul, -18
+  %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub1, i64 %sub, i64 %i.02
+  store i32 %conv, i32* %arrayidx3, align 4
+  %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.02, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx6, align 4
+; CHECK: da analyze - flow [=|<] splitable!
+; CHECK: da analyze - split level = 1, iteration = 9!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;; for (long int i = 0; i < 100; i++)
+;;   A[3*i - 18][22 - i][i] = ...
+;;   ... = A[i][i][i]
+
+define void @couple15([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %sub = sub nsw i64 22, %i.02
+  %mul = mul nsw i64 %i.02, 3
+  %sub1 = add nsw i64 %mul, -18
+  %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub1, i64 %sub, i64 %i.02
+  store i32 %conv, i32* %arrayidx3, align 4
+  %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.02, i64 %i.02, i64 %i.02
+  %0 = load i32* %arrayidx6, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/ExactRDIV.ll b/test/Analysis/DependenceAnalysis/ExactRDIV.ll
new file mode 100644
index 00000000000..aa5d254a0ce
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/ExactRDIV.ll
@@ -0,0 +1,508 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'ExactRDIV.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (long int i = 0; i < 10; i++)
+;;    A[4*i + 10] = ...
+;;  for (long int j = 0; j < 10; j++)
+;;    ... = A[2*j + 1];
+
+define void @rdiv0(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %i.03, 2
+  %add = add nsw i64 %mul, 10
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc, 10
+  br i1 %cmp, label %for.body, label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.body
+  %j.02 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+  %mul5 = shl nsw i64 %j.02, 1
+  %add64 = or i64 %mul5, 1
+  %arrayidx7 = getelementptr inbounds i32* %A, i64 %add64
+  %0 = load i32* %arrayidx7, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc9 = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc9, 10
+  br i1 %cmp2, label %for.body4, label %for.end10
+
+for.end10:                                        ; preds = %for.body4
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 5; i++)
+;;    A[11*i - 45] = ...
+;;  for (long int j = 0; j < 10; j++)
+;;    ... = A[j];
+
+define void @rdiv1(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, 11
+  %sub = add nsw i64 %mul, -45
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc, 5
+  br i1 %cmp, label %for.body, label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.body
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc7 = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc7, 10
+  br i1 %cmp2, label %for.body4, label %for.end8
+
+for.end8:                                         ; preds = %for.body4
+  ret void
+}
+
+
+;;  for (long int i = 0; i <= 5; i++)
+;;    A[11*i - 45] = ...
+;;  for (long int j = 0; j < 10; j++)
+;;    ... = A[j];
+
+define void @rdiv2(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, 11
+  %sub = add nsw i64 %mul, -45
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc, 6
+  br i1 %cmp, label %for.body, label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.body
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc7 = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc7, 10
+  br i1 %cmp2, label %for.body4, label %for.end8
+
+for.end8:                                         ; preds = %for.body4
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 5; i++)
+;;    A[11*i - 45] = ...
+;;  for (long int j = 0; j <= 10; j++)
+;;    ... = A[j];
+
+define void @rdiv3(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, 11
+  %sub = add nsw i64 %mul, -45
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc, 5
+  br i1 %cmp, label %for.body, label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.body
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc7 = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc7, 11
+  br i1 %cmp2, label %for.body4, label %for.end8
+
+for.end8:                                         ; preds = %for.body4
+  ret void
+}
+
+
+;;  for (long int i = 0; i <= 5; i++)
+;;    A[11*i - 45] = ...
+;;  for (long int j = 0; j <= 10; j++)
+;;    ... = A[j];
+
+define void @rdiv4(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, 11
+  %sub = add nsw i64 %mul, -45
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc, 6
+  br i1 %cmp, label %for.body, label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.body
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - flow!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc7 = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc7, 11
+  br i1 %cmp2, label %for.body4, label %for.end8
+
+for.end8:                                         ; preds = %for.body4
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 5; i++)
+;;    A[-11*i + 45] = ...
+;;  for (long int j = 0; j < 10; j++)
+;;    ... = A[-j];
+
+define void @rdiv5(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, -11
+  %add = add nsw i64 %mul, 45
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc, 5
+  br i1 %cmp, label %for.body, label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.body
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+  %sub = sub nsw i64 0, %j.02
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc7 = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc7, 10
+  br i1 %cmp2, label %for.body4, label %for.end8
+
+for.end8:                                         ; preds = %for.body4
+  ret void
+}
+
+
+;;  for (long int i = 0; i <= 5; i++)
+;;    A[-11*i + 45] = ...
+;;  for (long int j = 0; j < 10; j++)
+;;    ... = A[-j];
+
+define void @rdiv6(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, -11
+  %add = add nsw i64 %mul, 45
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc, 6
+  br i1 %cmp, label %for.body, label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.body
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+  %sub = sub nsw i64 0, %j.02
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc7 = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc7, 10
+  br i1 %cmp2, label %for.body4, label %for.end8
+
+for.end8:                                         ; preds = %for.body4
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 5; i++)
+;;    A[-11*i + 45] = ...
+;;  for (long int j = 0; j <= 10; j++)
+;;    ... = A[-j];
+
+define void @rdiv7(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, -11
+  %add = add nsw i64 %mul, 45
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc, 5
+  br i1 %cmp, label %for.body, label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.body
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+  %sub = sub nsw i64 0, %j.02
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc7 = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc7, 11
+  br i1 %cmp2, label %for.body4, label %for.end8
+
+for.end8:                                         ; preds = %for.body4
+  ret void
+}
+
+
+;;  for (long int i = 0; i <= 5; i++)
+;;    A[-11*i + 45] = ...
+;;  for (long int j = 0; j <= 10; j++)
+;;    ... = A[-j];
+
+define void @rdiv8(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, -11
+  %add = add nsw i64 %mul, 45
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc, 6
+  br i1 %cmp, label %for.body, label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.body
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+  %sub = sub nsw i64 0, %j.02
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - flow!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc7 = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc7, 11
+  br i1 %cmp2, label %for.body4, label %for.end8
+
+for.end8:                                         ; preds = %for.body4
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 5; i++)
+;;    for (long int j = 0; j < 10; j++)
+;;      A[11*i - j] = ...
+;;      ... = A[45];
+
+define void @rdiv9(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc5, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, 11
+  %sub = sub nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 45
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 10
+  br i1 %cmp2, label %for.body3, label %for.inc5
+
+for.inc5:                                         ; preds = %for.body3
+  %inc6 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc6, 5
+  br i1 %cmp, label %for.cond1.preheader, label %for.end7
+
+for.end7:                                         ; preds = %for.inc5
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 5; i++)
+;;    for (long int j = 0; j <= 10; j++)
+;;      A[11*i - j] = ...
+;;      ... = A[45];
+
+define void @rdiv10(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc5, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, 11
+  %sub = sub nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 45
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 10
+  br i1 %cmp2, label %for.body3, label %for.inc5
+
+for.inc5:                                         ; preds = %for.body3
+  %inc6 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc6, 6
+  br i1 %cmp, label %for.cond1.preheader, label %for.end7
+
+for.end7:                                         ; preds = %for.inc5
+  ret void
+}
+
+
+;;  for (long int i = 0; i <= 5; i++)
+;;    for (long int j = 0; j <= 10; j++)
+;;      A[11*i - j] = ...
+;;      ... = A[45];
+
+define void @rdiv11(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc5, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, 11
+  %sub = sub nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 45
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 11
+  br i1 %cmp2, label %for.body3, label %for.inc5
+
+for.inc5:                                         ; preds = %for.body3
+  %inc6 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc6, 5
+  br i1 %cmp, label %for.cond1.preheader, label %for.end7
+
+for.end7:                                         ; preds = %for.inc5
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 5; i++)
+;;    for (long int j = 0; j < 10; j++)
+;;      A[11*i - j] = ...
+;;      ... = A[45];
+
+define void @rdiv12(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc5, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, 11
+  %sub = sub nsw i64 %mul, %j.02
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 45
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - flow [* *|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 11
+  br i1 %cmp2, label %for.body3, label %for.inc5
+
+for.inc5:                                         ; preds = %for.body3
+  %inc6 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc6, 6
+  br i1 %cmp, label %for.cond1.preheader, label %for.end7
+
+for.end7:                                         ; preds = %for.inc5
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/ExactSIV.ll b/test/Analysis/DependenceAnalysis/ExactSIV.ll
new file mode 100644
index 00000000000..71e05024629
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/ExactSIV.ll
@@ -0,0 +1,428 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'ExactSIV.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (long unsigned i = 0; i < 10; i++) {
+;;    A[i + 10] = ...
+;;    ... = A[2*i + 1];
+
+define void @exact0(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %add = add i64 %i.02, 10
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul = shl i64 %i.02, 1
+  %add13 = or i64 %mul, 1
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %add13
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - flow [<=|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 10; i++) {
+;;    A[4*i + 10] = ...
+;;    ... = A[2*i + 1];
+
+define void @exact1(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = shl i64 %i.02, 2
+  %add = add i64 %mul, 10
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul1 = shl i64 %i.02, 1
+  %add23 = or i64 %mul1, 1
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 %add23
+  %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 10; i++) {
+;;    A[6*i] = ...
+;;    ... = A[i + 60];
+
+define void @exact2(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, 6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add i64 %i.02, 60
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i <= 10; i++) {
+;;    A[6*i] = ...
+;;    ... = A[i + 60];
+
+define void @exact3(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, 6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add i64 %i.02, 60
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [>]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 11
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 12; i++) {
+;;    A[6*i] = ...
+;;    ... = A[i + 60];
+
+define void @exact4(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, 6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add i64 %i.02, 60
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [>]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 12
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i <= 12; i++) {
+;;    A[6*i] = ...
+;;    ... = A[i + 60];
+
+define void @exact5(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, 6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add i64 %i.02, 60
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [=>|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 13
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 18; i++) {
+;;    A[6*i] = ...
+;;    ... = A[i + 60];
+
+define void @exact6(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, 6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add i64 %i.02, 60
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [=>|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 18
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i <= 18; i++) {
+;;    A[6*i] = ...
+;;    ... = A[i + 60];
+
+define void @exact7(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, 6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add i64 %i.02, 60
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [*|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 19
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 10; i++) {
+;;    A[-6*i] = ...
+;;    ... = A[-i - 60];
+
+define void @exact8(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, -6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub1 = sub i64 -60, %i.02
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i <= 10; i++) {
+;;    A[-6*i] = ...
+;;    ... = A[-i - 60];
+
+define void @exact9(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, -6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub1 = sub i64 -60, %i.02
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - flow [>]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 11
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 12; i++) {
+;;    A[-6*i] = ...
+;;    ... = A[-i - 60];
+
+define void @exact10(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, -6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub1 = sub i64 -60, %i.02
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - flow [>]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 12
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i <= 12; i++) {
+;;    A[-6*i] = ...
+;;    ... = A[-i - 60];
+
+define void @exact11(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, -6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub1 = sub i64 -60, %i.02
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - flow [=>|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 13
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 18; i++) {
+;;    A[-6*i] = ...
+;;    ... = A[-i - 60];
+
+define void @exact12(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, -6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub1 = sub i64 -60, %i.02
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - flow [=>|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 18
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i <= 18; i++) {
+;;    A[-6*i] = ...
+;;    ... = A[-i - 60];
+
+define void @exact13(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, -6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub1 = sub i64 -60, %i.02
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - flow [*|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 19
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll
new file mode 100644
index 00000000000..94c93a8a0dd
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/GCD.ll
@@ -0,0 +1,597 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'GCD.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[2*i - 4*j] = ...
+;;      ... = A[6*i + 8*j];
+
+define void @gcd0(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc8 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %i.03, 1
+  %mul4 = shl nsw i64 %j.02, 2
+  %sub = sub nsw i64 %mul, %mul4
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul5 = mul nsw i64 %i.03, 6
+  %mul6 = shl nsw i64 %j.02, 3
+  %add = add nsw i64 %mul5, %mul6
+  %arrayidx7 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx7, align 4
+; CHECK: da analyze - flow [=> *|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc8
+
+for.inc8:                                         ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
+  %inc9 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc9, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end10
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[2*i - 4*j] = ...
+;;      ... = A[6*i + 8*j + 1];
+
+define void @gcd1(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc9
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %i.03, 1
+  %mul4 = shl nsw i64 %j.02, 2
+  %sub = sub nsw i64 %mul, %mul4
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul5 = mul nsw i64 %i.03, 6
+  %mul6 = shl nsw i64 %j.02, 3
+  %add = add nsw i64 %mul5, %mul6
+  %add7 = or i64 %add, 1
+  %arrayidx8 = getelementptr inbounds i32* %A, i64 %add7
+  %0 = load i32* %arrayidx8, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc9
+
+for.inc9:                                         ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
+  %inc10 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc10, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end11
+
+for.end11:                                        ; preds = %for.inc9
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[2*i - 4*j + 1] = ...
+;;      ... = A[6*i + 8*j];
+
+define void @gcd2(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc9
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %i.03, 1
+  %mul4 = shl nsw i64 %j.02, 2
+  %sub = sub nsw i64 %mul, %mul4
+  %add5 = or i64 %sub, 1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add5
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul5 = mul nsw i64 %i.03, 6
+  %mul6 = shl nsw i64 %j.02, 3
+  %add7 = add nsw i64 %mul5, %mul6
+  %arrayidx8 = getelementptr inbounds i32* %A, i64 %add7
+  %0 = load i32* %arrayidx8, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc9
+
+for.inc9:                                         ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
+  %inc10 = add nsw i64 %i.03, 1
+  %exitcond6 = icmp ne i64 %inc10, 100
+  br i1 %exitcond6, label %for.cond1.preheader, label %for.end11
+
+for.end11:                                        ; preds = %for.inc9
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[i + 2*j] = ...
+;;      ... = A[i + 2*j - 1];
+
+define void @gcd3(i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc7
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc7 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %j.02, 1
+  %add = add nsw i64 %i.03, %mul
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul4 = shl nsw i64 %j.02, 1
+  %add5 = add nsw i64 %i.03, %mul4
+  %sub = add nsw i64 %add5, -1
+  %arrayidx6 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx6, align 4
+; CHECK: da analyze - flow [<> *]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc7
+
+for.inc7:                                         ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
+  %inc8 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc8, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end9
+
+for.end9:                                         ; preds = %for.inc7
+  ret void
+}
+
+
+;;  void gcd4(int *A, int *B, long int M, long int N) {
+;;    for (long int i = 0; i < 100; i++)
+;;      for (long int j = 0; j < 100; j++) {
+;;        A[5*i + 10*j*M + 9*M*N] = i;
+;;        *B++ = A[15*i + 20*j*M - 21*N*M + 4];
+
+define void @gcd4(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc17
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, 5
+  %mul4 = mul nsw i64 %j.02, 10
+  %mul5 = mul nsw i64 %mul4, %M
+  %add = add nsw i64 %mul, %mul5
+  %mul6 = mul nsw i64 %M, 9
+  %mul7 = mul nsw i64 %mul6, %N
+  %add8 = add nsw i64 %add, %mul7
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add8
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul9 = mul nsw i64 %i.03, 15
+  %mul10 = mul nsw i64 %j.02, 20
+  %mul11 = mul nsw i64 %mul10, %M
+  %add12 = add nsw i64 %mul9, %mul11
+  %mul13 = mul nsw i64 %N, 21
+  %mul14 = mul nsw i64 %mul13, %M
+  %sub = sub nsw i64 %add12, %mul14
+  %add15 = add nsw i64 %sub, 4
+  %arrayidx16 = getelementptr inbounds i32* %A, i64 %add15
+  %0 = load i32* %arrayidx16, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc17
+
+for.inc17:                                        ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
+  %inc18 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc18, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end19
+
+for.end19:                                        ; preds = %for.inc17
+  ret void
+}
+
+
+;;  void gcd5(int *A, int *B, long int M, long int N) {
+;;    for (long int i = 0; i < 100; i++)
+;;      for (long int j = 0; j < 100; j++) {
+;;        A[5*i + 10*j*M + 9*M*N] = i;
+;;        *B++ = A[15*i + 20*j*M - 21*N*M + 5];
+
+define void @gcd5(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc17
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, 5
+  %mul4 = mul nsw i64 %j.02, 10
+  %mul5 = mul nsw i64 %mul4, %M
+  %add = add nsw i64 %mul, %mul5
+  %mul6 = mul nsw i64 %M, 9
+  %mul7 = mul nsw i64 %mul6, %N
+  %add8 = add nsw i64 %add, %mul7
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add8
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul9 = mul nsw i64 %i.03, 15
+  %mul10 = mul nsw i64 %j.02, 20
+  %mul11 = mul nsw i64 %mul10, %M
+  %add12 = add nsw i64 %mul9, %mul11
+  %mul13 = mul nsw i64 %N, 21
+  %mul14 = mul nsw i64 %mul13, %M
+  %sub = sub nsw i64 %add12, %mul14
+  %add15 = add nsw i64 %sub, 5
+  %arrayidx16 = getelementptr inbounds i32* %A, i64 %add15
+  %0 = load i32* %arrayidx16, align 4
+; CHECK: da analyze - flow [<> *]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc17
+
+for.inc17:                                        ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
+  %inc18 = add nsw i64 %i.03, 1
+  %exitcond5 = icmp ne i64 %inc18, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end19
+
+for.end19:                                        ; preds = %for.inc17
+  ret void
+}
+
+
+;;  void gcd6(long int n, int A[][n], int *B) {
+;;    for (long int i = 0; i < n; i++)
+;;      for (long int j = 0; j < n; j++) {
+;;        A[2*i][4*j] = i;
+;;        *B++ = A[8*i][6*j + 1];
+
+define void @gcd6(i64 %n, i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  %cmp4 = icmp sgt i64 %n, 0
+  br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end12
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc10
+  %i.06 = phi i64 [ %inc11, %for.inc10 ], [ 0, %for.cond1.preheader.preheader ]
+  %B.addr.05 = phi i32* [ %B.addr.1.lcssa, %for.inc10 ], [ %B, %for.cond1.preheader.preheader ]
+  %cmp21 = icmp sgt i64 %n, 0
+  br i1 %cmp21, label %for.body3.preheader, label %for.inc10
+
+for.body3.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.preheader, %for.body3
+  %j.03 = phi i64 [ %inc, %for.body3 ], [ 0, %for.body3.preheader ]
+  %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.05, %for.body3.preheader ]
+  %conv = trunc i64 %i.06 to i32
+  %mul = shl nsw i64 %j.03, 2
+  %mul4 = shl nsw i64 %i.06, 1
+  %0 = mul nsw i64 %mul4, %n
+  %arrayidx.sum = add i64 %0, %mul
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %arrayidx.sum
+  store i32 %conv, i32* %arrayidx5, align 4
+  %mul6 = mul nsw i64 %j.03, 6
+  %add7 = or i64 %mul6, 1
+  %mul7 = shl nsw i64 %i.06, 3
+  %1 = mul nsw i64 %mul7, %n
+  %arrayidx8.sum = add i64 %1, %add7
+  %arrayidx9 = getelementptr inbounds i32* %A, i64 %arrayidx8.sum
+  %2 = load i32* %arrayidx9, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
+  store i32 %2, i32* %B.addr.12, align 4
+  %inc = add nsw i64 %j.03, 1
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body3, label %for.inc10.loopexit
+
+for.inc10.loopexit:                               ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.05, i64 %n
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.inc10.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i32* [ %B.addr.05, %for.cond1.preheader ], [ %scevgep, %for.inc10.loopexit ]
+  %inc11 = add nsw i64 %i.06, 1
+  %exitcond8 = icmp ne i64 %inc11, %n
+  br i1 %exitcond8, label %for.cond1.preheader, label %for.end12.loopexit
+
+for.end12.loopexit:                               ; preds = %for.inc10
+  br label %for.end12
+
+for.end12:                                        ; preds = %for.end12.loopexit, %entry
+  ret void
+}
+
+
+;;  void gcd7(int n, int A[][n], int *B) {
+;;    for (int i = 0; i < n; i++)
+;;      for (int j = 0; j < n; j++) {
+;;        A[2*i][4*j] = i;
+;;        *B++ = A[8*i][6*j + 1];
+
+define void @gcd7(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  %0 = zext i32 %n to i64
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc13
+  %indvars.iv8 = phi i64 [ 0, %for.cond1.preheader.preheader ], [ %indvars.iv.next9, %for.inc13 ]
+  %B.addr.05 = phi i32* [ %B.addr.1.lcssa, %for.inc13 ], [ %B, %for.cond1.preheader.preheader ]
+  %1 = add i32 %n, -1
+  %2 = zext i32 %1 to i64
+  %3 = add i64 %2, 1
+  %cmp21 = icmp sgt i32 %n, 0
+  br i1 %cmp21, label %for.body3.preheader, label %for.inc13
+
+for.body3.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.preheader, %for.body3
+  %indvars.iv = phi i64 [ 0, %for.body3.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.05, %for.body3.preheader ]
+  %4 = trunc i64 %indvars.iv to i32
+  %mul = shl nsw i32 %4, 2
+  %idxprom = sext i32 %mul to i64
+  %5 = trunc i64 %indvars.iv8 to i32
+  %mul4 = shl nsw i32 %5, 1
+  %idxprom5 = sext i32 %mul4 to i64
+  %6 = mul nsw i64 %idxprom5, %0
+  %arrayidx.sum = add i64 %6, %idxprom
+  %arrayidx6 = getelementptr inbounds i32* %A, i64 %arrayidx.sum
+  %7 = trunc i64 %indvars.iv8 to i32
+  store i32 %7, i32* %arrayidx6, align 4
+  %8 = trunc i64 %indvars.iv to i32
+  %mul7 = mul nsw i32 %8, 6
+  %add7 = or i32 %mul7, 1
+  %idxprom8 = sext i32 %add7 to i64
+  %9 = trunc i64 %indvars.iv8 to i32
+  %mul9 = shl nsw i32 %9, 3
+  %idxprom10 = sext i32 %mul9 to i64
+  %10 = mul nsw i64 %idxprom10, %0
+  %arrayidx11.sum = add i64 %10, %idxprom8
+  %arrayidx12 = getelementptr inbounds i32* %A, i64 %arrayidx11.sum
+  %11 = load i32* %arrayidx12, align 4
+; CHECK: da analyze - flow [* *|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
+  store i32 %11, i32* %B.addr.12, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.body3, label %for.inc13.loopexit
+
+for.inc13.loopexit:                               ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.05, i64 %3
+  br label %for.inc13
+
+for.inc13:                                        ; preds = %for.inc13.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i32* [ %B.addr.05, %for.cond1.preheader ], [ %scevgep, %for.inc13.loopexit ]
+  %indvars.iv.next9 = add i64 %indvars.iv8, 1
+  %lftr.wideiv10 = trunc i64 %indvars.iv.next9 to i32
+  %exitcond11 = icmp ne i32 %lftr.wideiv10, %n
+  br i1 %exitcond11, label %for.cond1.preheader, label %for.end15.loopexit
+
+for.end15.loopexit:                               ; preds = %for.inc13
+  br label %for.end15
+
+for.end15:                                        ; preds = %for.end15.loopexit, %entry
+  ret void
+}
+
+
+;;  void gcd8(int n, int *A, int *B) {
+;;    for (int i = 0; i < n; i++)
+;;      for (int j = 0; j < n; j++) {
+;;        A[n*2*i + 4*j] = i;
+;;        *B++ = A[n*8*i + 6*j + 1];
+
+define void @gcd8(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc13
+  %i.06 = phi i32 [ %inc14, %for.inc13 ], [ 0, %for.cond1.preheader.preheader ]
+  %B.addr.05 = phi i32* [ %B.addr.1.lcssa, %for.inc13 ], [ %B, %for.cond1.preheader.preheader ]
+  %0 = add i32 %n, -1
+  %1 = zext i32 %0 to i64
+  %2 = add i64 %1, 1
+  %cmp21 = icmp sgt i32 %n, 0
+  br i1 %cmp21, label %for.body3.preheader, label %for.inc13
+
+for.body3.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.preheader, %for.body3
+  %indvars.iv = phi i64 [ 0, %for.body3.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.05, %for.body3.preheader ]
+  %mul = shl nsw i32 %n, 1
+  %mul4 = mul nsw i32 %mul, %i.06
+  %3 = trunc i64 %indvars.iv to i32
+  %mul5 = shl nsw i32 %3, 2
+  %add = add nsw i32 %mul4, %mul5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
+  store i32 %i.06, i32* %arrayidx, align 4
+  %mul6 = shl nsw i32 %n, 3
+  %mul7 = mul nsw i32 %mul6, %i.06
+  %4 = trunc i64 %indvars.iv to i32
+  %mul8 = mul nsw i32 %4, 6
+  %add9 = add nsw i32 %mul7, %mul8
+  %add10 = or i32 %add9, 1
+  %idxprom11 = sext i32 %add10 to i64
+  %arrayidx12 = getelementptr inbounds i32* %A, i64 %idxprom11
+  %5 = load i32* %arrayidx12, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
+  store i32 %5, i32* %B.addr.12, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.body3, label %for.inc13.loopexit
+
+for.inc13.loopexit:                               ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.05, i64 %2
+  br label %for.inc13
+
+for.inc13:                                        ; preds = %for.inc13.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i32* [ %B.addr.05, %for.cond1.preheader ], [ %scevgep, %for.inc13.loopexit ]
+  %inc14 = add nsw i32 %i.06, 1
+  %exitcond7 = icmp ne i32 %inc14, %n
+  br i1 %exitcond7, label %for.cond1.preheader, label %for.end15.loopexit
+
+for.end15.loopexit:                               ; preds = %for.inc13
+  br label %for.end15
+
+for.end15:                                        ; preds = %for.end15.loopexit, %entry
+  ret void
+}
+
+
+;;  void gcd9(unsigned n, int A[][n], int *B) {
+;;    for (unsigned i = 0; i < n; i++)
+;;      for (unsigned j = 0; j < n; j++) {
+;;        A[2*i][4*j] = i;
+;;        *B++ = A[8*i][6*j + 1];
+
+define void @gcd9(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  %0 = zext i32 %n to i64
+  %cmp4 = icmp eq i32 %n, 0
+  br i1 %cmp4, label %for.end15, label %for.cond1.preheader.preheader
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc13
+  %indvars.iv8 = phi i64 [ 0, %for.cond1.preheader.preheader ], [ %indvars.iv.next9, %for.inc13 ]
+  %B.addr.05 = phi i32* [ %B.addr.1.lcssa, %for.inc13 ], [ %B, %for.cond1.preheader.preheader ]
+  %1 = add i32 %n, -1
+  %2 = zext i32 %1 to i64
+  %3 = add i64 %2, 1
+  %cmp21 = icmp eq i32 %n, 0
+  br i1 %cmp21, label %for.inc13, label %for.body3.preheader
+
+for.body3.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.preheader, %for.body3
+  %indvars.iv = phi i64 [ 0, %for.body3.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.05, %for.body3.preheader ]
+  %4 = trunc i64 %indvars.iv to i32
+  %mul = shl i32 %4, 2
+  %idxprom = zext i32 %mul to i64
+  %5 = trunc i64 %indvars.iv8 to i32
+  %mul4 = shl i32 %5, 1
+  %idxprom5 = zext i32 %mul4 to i64
+  %6 = mul nsw i64 %idxprom5, %0
+  %arrayidx.sum = add i64 %6, %idxprom
+  %arrayidx6 = getelementptr inbounds i32* %A, i64 %arrayidx.sum
+  %7 = trunc i64 %indvars.iv8 to i32
+  store i32 %7, i32* %arrayidx6, align 4
+  %8 = trunc i64 %indvars.iv to i32
+  %mul7 = mul i32 %8, 6
+  %add7 = or i32 %mul7, 1
+  %idxprom8 = zext i32 %add7 to i64
+  %9 = trunc i64 %indvars.iv8 to i32
+  %mul9 = shl i32 %9, 3
+  %idxprom10 = zext i32 %mul9 to i64
+  %10 = mul nsw i64 %idxprom10, %0
+  %arrayidx11.sum = add i64 %10, %idxprom8
+  %arrayidx12 = getelementptr inbounds i32* %A, i64 %arrayidx11.sum
+  %11 = load i32* %arrayidx12, align 4
+; CHECK: da analyze - flow [* *|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
+  store i32 %11, i32* %B.addr.12, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.body3, label %for.inc13.loopexit
+
+for.inc13.loopexit:                               ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.05, i64 %3
+  br label %for.inc13
+
+for.inc13:                                        ; preds = %for.inc13.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i32* [ %B.addr.05, %for.cond1.preheader ], [ %scevgep, %for.inc13.loopexit ]
+  %indvars.iv.next9 = add i64 %indvars.iv8, 1
+  %lftr.wideiv10 = trunc i64 %indvars.iv.next9 to i32
+  %exitcond11 = icmp ne i32 %lftr.wideiv10, %n
+  br i1 %exitcond11, label %for.cond1.preheader, label %for.end15.loopexit
+
+for.end15.loopexit:                               ; preds = %for.inc13
+  br label %for.end15
+
+for.end15:                                        ; preds = %for.end15.loopexit, %entry
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/Preliminary.ll b/test/Analysis/DependenceAnalysis/Preliminary.ll
new file mode 100644
index 00000000000..3ef63fd5592
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/Preliminary.ll
@@ -0,0 +1,469 @@
+; RUN: opt < %s -analyze -basicaa -indvars -da | FileCheck %s
+
+; This series of tests is more interesting when debugging is enabled.
+
+; ModuleID = 'Preliminary.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;; may alias
+;; int p0(int n, int *A, int *B) {
+;;  A[0] = n;
+;;  return B[1];
+
+define i32 @p0(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
+entry:
+  store i32 %n, i32* %A, align 4
+  %arrayidx1 = getelementptr inbounds i32* %B, i64 1
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - confused!
+  ret i32 %0
+}
+
+
+;; no alias
+;; int p1(int n, int *restrict A, int *restrict B) {
+;;  A[0] = n;
+;;  return B[1];
+
+define i32 @p1(i32 %n, i32* noalias %A, i32* noalias %B) nounwind uwtable ssp {
+entry:
+  store i32 %n, i32* %A, align 4
+  %arrayidx1 = getelementptr inbounds i32* %B, i64 1
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  ret i32 %0
+}
+
+;; check loop nesting levels
+;;  for (long int i = 0; i < n; i++)
+;;    for (long int j = 0; j < n; j++)
+;;      for (long int k = 0; k < n; k++)
+;;        A[i][j][k] = ...
+;;      for (long int k = 0; k < n; k++)
+;;        ... = A[i + 3][j + 2][k + 1];
+
+define void @p2(i64 %n, [100 x [100 x i64]]* %A, i64* %B) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i64 %n, 0
+  br i1 %cmp10, label %for.cond1.preheader, label %for.end26
+
+for.cond1.preheader:                              ; preds = %for.inc24, %entry
+  %B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %entry ]
+  %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %entry ]
+  %cmp26 = icmp sgt i64 %n, 0
+  br i1 %cmp26, label %for.cond4.preheader, label %for.inc24
+
+for.cond4.preheader:                              ; preds = %for.inc21, %for.cond1.preheader
+  %B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond1.preheader ]
+  %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond1.preheader ]
+  %cmp51 = icmp sgt i64 %n, 0
+  br i1 %cmp51, label %for.body6, label %for.cond10.loopexit
+
+for.body6:                                        ; preds = %for.body6, %for.cond4.preheader
+  %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.cond4.preheader ]
+  %arrayidx8 = getelementptr inbounds [100 x [100 x i64]]* %A, i64 %i.011, i64 %j.07, i64 %k.02
+  store i64 %i.011, i64* %arrayidx8, align 8
+  %inc = add nsw i64 %k.02, 1
+  %cmp5 = icmp slt i64 %inc, %n
+  br i1 %cmp5, label %for.body6, label %for.cond10.loopexit
+
+for.cond10.loopexit:                              ; preds = %for.body6, %for.cond4.preheader
+  %cmp113 = icmp sgt i64 %n, 0
+  br i1 %cmp113, label %for.body12, label %for.inc21
+
+for.body12:                                       ; preds = %for.body12, %for.cond10.loopexit
+  %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.cond10.loopexit ]
+  %B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.cond10.loopexit ]
+  %add = add nsw i64 %k9.05, 1
+  %add13 = add nsw i64 %j.07, 2
+  %add14 = add nsw i64 %i.011, 3
+  %arrayidx17 = getelementptr inbounds [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add
+  %0 = load i64* %arrayidx17, align 8
+; CHECK: da analyze - flow [-3 -2]!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.24, i64 1
+  store i64 %0, i64* %B.addr.24, align 8
+  %inc19 = add nsw i64 %k9.05, 1
+  %cmp11 = icmp slt i64 %inc19, %n
+  br i1 %cmp11, label %for.body12, label %for.inc21
+
+for.inc21:                                        ; preds = %for.body12, %for.cond10.loopexit
+  %B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %incdec.ptr, %for.body12 ]
+  %inc22 = add nsw i64 %j.07, 1
+  %cmp2 = icmp slt i64 %inc22, %n
+  br i1 %cmp2, label %for.cond4.preheader, label %for.inc24
+
+for.inc24:                                        ; preds = %for.inc21, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa, %for.inc21 ]
+  %inc25 = add nsw i64 %i.011, 1
+  %cmp = icmp slt i64 %inc25, %n
+  br i1 %cmp, label %for.cond1.preheader, label %for.end26
+
+for.end26:                                        ; preds = %for.inc24, %entry
+  ret void
+}
+
+
+;; classify subscripts
+;;  for (long int i = 0; i < n; i++)
+;;  for (long int j = 0; j < n; j++)
+;;  for (long int k = 0; k < n; k++)
+;;  for (long int l = 0; l < n; l++)
+;;  for (long int m = 0; m < n; m++)
+;;  for (long int o = 0; o < n; o++)
+;;  for (long int p = 0; p < n; p++)
+;;  for (long int q = 0; q < n; q++)
+;;  for (long int r = 0; r < n; r++)
+;;  for (long int s = 0; s < n; s++)
+;;  for (long int u = 0; u < n; u++)
+;;  for (long int t = 0; t < n; t++) {
+;;          A[i - 3] [j] [2] [k-1] [2*l + 1] [m] [p + q] [r + s] = ...
+;;    ... = A[i + 3] [2] [u] [1-k] [3*l - 1] [o] [1 + n] [t + 2];
+
+define void @p3(i64 %n, [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]]* %A, i64* %B) nounwind uwtable ssp {
+entry:
+  %cmp44 = icmp sgt i64 %n, 0
+  br i1 %cmp44, label %for.cond1.preheader, label %for.end90
+
+for.cond1.preheader:                              ; preds = %for.inc88, %entry
+  %B.addr.046 = phi i64* [ %B.addr.1.lcssa, %for.inc88 ], [ %B, %entry ]
+  %i.045 = phi i64 [ %inc89, %for.inc88 ], [ 0, %entry ]
+  %cmp240 = icmp sgt i64 %n, 0
+  br i1 %cmp240, label %for.cond4.preheader, label %for.inc88
+
+for.cond4.preheader:                              ; preds = %for.inc85, %for.cond1.preheader
+  %B.addr.142 = phi i64* [ %B.addr.2.lcssa, %for.inc85 ], [ %B.addr.046, %for.cond1.preheader ]
+  %j.041 = phi i64 [ %inc86, %for.inc85 ], [ 0, %for.cond1.preheader ]
+  %cmp536 = icmp sgt i64 %n, 0
+  br i1 %cmp536, label %for.cond7.preheader, label %for.inc85
+
+for.cond7.preheader:                              ; preds = %for.inc82, %for.cond4.preheader
+  %B.addr.238 = phi i64* [ %B.addr.3.lcssa, %for.inc82 ], [ %B.addr.142, %for.cond4.preheader ]
+  %k.037 = phi i64 [ %inc83, %for.inc82 ], [ 0, %for.cond4.preheader ]
+  %cmp832 = icmp sgt i64 %n, 0
+  br i1 %cmp832, label %for.cond10.preheader, label %for.inc82
+
+for.cond10.preheader:                             ; preds = %for.inc79, %for.cond7.preheader
+  %B.addr.334 = phi i64* [ %B.addr.4.lcssa, %for.inc79 ], [ %B.addr.238, %for.cond7.preheader ]
+  %l.033 = phi i64 [ %inc80, %for.inc79 ], [ 0, %for.cond7.preheader ]
+  %cmp1128 = icmp sgt i64 %n, 0
+  br i1 %cmp1128, label %for.cond13.preheader, label %for.inc79
+
+for.cond13.preheader:                             ; preds = %for.inc76, %for.cond10.preheader
+  %B.addr.430 = phi i64* [ %B.addr.5.lcssa, %for.inc76 ], [ %B.addr.334, %for.cond10.preheader ]
+  %m.029 = phi i64 [ %inc77, %for.inc76 ], [ 0, %for.cond10.preheader ]
+  %cmp1424 = icmp sgt i64 %n, 0
+  br i1 %cmp1424, label %for.cond16.preheader, label %for.inc76
+
+for.cond16.preheader:                             ; preds = %for.inc73, %for.cond13.preheader
+  %B.addr.526 = phi i64* [ %B.addr.6.lcssa, %for.inc73 ], [ %B.addr.430, %for.cond13.preheader ]
+  %o.025 = phi i64 [ %inc74, %for.inc73 ], [ 0, %for.cond13.preheader ]
+  %cmp1720 = icmp sgt i64 %n, 0
+  br i1 %cmp1720, label %for.cond19.preheader, label %for.inc73
+
+for.cond19.preheader:                             ; preds = %for.inc70, %for.cond16.preheader
+  %B.addr.622 = phi i64* [ %B.addr.7.lcssa, %for.inc70 ], [ %B.addr.526, %for.cond16.preheader ]
+  %p.021 = phi i64 [ %inc71, %for.inc70 ], [ 0, %for.cond16.preheader ]
+  %cmp2016 = icmp sgt i64 %n, 0
+  br i1 %cmp2016, label %for.cond22.preheader, label %for.inc70
+
+for.cond22.preheader:                             ; preds = %for.inc67, %for.cond19.preheader
+  %B.addr.718 = phi i64* [ %B.addr.8.lcssa, %for.inc67 ], [ %B.addr.622, %for.cond19.preheader ]
+  %q.017 = phi i64 [ %inc68, %for.inc67 ], [ 0, %for.cond19.preheader ]
+  %cmp2312 = icmp sgt i64 %n, 0
+  br i1 %cmp2312, label %for.cond25.preheader, label %for.inc67
+
+for.cond25.preheader:                             ; preds = %for.inc64, %for.cond22.preheader
+  %B.addr.814 = phi i64* [ %B.addr.9.lcssa, %for.inc64 ], [ %B.addr.718, %for.cond22.preheader ]
+  %r.013 = phi i64 [ %inc65, %for.inc64 ], [ 0, %for.cond22.preheader ]
+  %cmp268 = icmp sgt i64 %n, 0
+  br i1 %cmp268, label %for.cond28.preheader, label %for.inc64
+
+for.cond28.preheader:                             ; preds = %for.inc61, %for.cond25.preheader
+  %B.addr.910 = phi i64* [ %B.addr.10.lcssa, %for.inc61 ], [ %B.addr.814, %for.cond25.preheader ]
+  %s.09 = phi i64 [ %inc62, %for.inc61 ], [ 0, %for.cond25.preheader ]
+  %cmp294 = icmp sgt i64 %n, 0
+  br i1 %cmp294, label %for.cond31.preheader, label %for.inc61
+
+for.cond31.preheader:                             ; preds = %for.inc58, %for.cond28.preheader
+  %u.06 = phi i64 [ %inc59, %for.inc58 ], [ 0, %for.cond28.preheader ]
+  %B.addr.105 = phi i64* [ %B.addr.11.lcssa, %for.inc58 ], [ %B.addr.910, %for.cond28.preheader ]
+  %cmp321 = icmp sgt i64 %n, 0
+  br i1 %cmp321, label %for.body33, label %for.inc58
+
+for.body33:                                       ; preds = %for.body33, %for.cond31.preheader
+  %t.03 = phi i64 [ %inc, %for.body33 ], [ 0, %for.cond31.preheader ]
+  %B.addr.112 = phi i64* [ %incdec.ptr, %for.body33 ], [ %B.addr.105, %for.cond31.preheader ]
+  %add = add nsw i64 %r.013, %s.09
+  %add34 = add nsw i64 %p.021, %q.017
+  %mul = shl nsw i64 %l.033, 1
+  %add3547 = or i64 %mul, 1
+  %sub = add nsw i64 %k.037, -1
+  %sub36 = add nsw i64 %i.045, -3
+  %arrayidx43 = getelementptr inbounds [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]]* %A, i64 %sub36, i64 %j.041, i64 2, i64 %sub, i64 %add3547, i64 %m.029, i64 %add34, i64 %add
+  store i64 %i.045, i64* %arrayidx43, align 8
+  %add44 = add nsw i64 %t.03, 2
+  %add45 = add nsw i64 %n, 1
+  %mul46 = mul nsw i64 %l.033, 3
+  %sub47 = add nsw i64 %mul46, -1
+  %sub48 = sub nsw i64 1, %k.037
+  %add49 = add nsw i64 %i.045, 3
+  %arrayidx57 = getelementptr inbounds [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]]* %A, i64 %add49, i64 2, i64 %u.06, i64 %sub48, i64 %sub47, i64 %o.025, i64 %add45, i64 %add44
+  %0 = load i64* %arrayidx57, align 8
+; CHECK: da analyze - flow [-6 * * => * * * * * * * *] splitable!
+; CHECK: da analyze - split level = 3, iteration = 1!
+  %incdec.ptr = getelementptr inbounds i64* %B.addr.112, i64 1
+  store i64 %0, i64* %B.addr.112, align 8
+  %inc = add nsw i64 %t.03, 1
+  %cmp32 = icmp slt i64 %inc, %n
+  br i1 %cmp32, label %for.body33, label %for.inc58
+
+for.inc58:                                        ; preds = %for.body33, %for.cond31.preheader
+  %B.addr.11.lcssa = phi i64* [ %B.addr.105, %for.cond31.preheader ], [ %incdec.ptr, %for.body33 ]
+  %inc59 = add nsw i64 %u.06, 1
+  %cmp29 = icmp slt i64 %inc59, %n
+  br i1 %cmp29, label %for.cond31.preheader, label %for.inc61
+
+for.inc61:                                        ; preds = %for.inc58, %for.cond28.preheader
+  %B.addr.10.lcssa = phi i64* [ %B.addr.910, %for.cond28.preheader ], [ %B.addr.11.lcssa, %for.inc58 ]
+  %inc62 = add nsw i64 %s.09, 1
+  %cmp26 = icmp slt i64 %inc62, %n
+  br i1 %cmp26, label %for.cond28.preheader, label %for.inc64
+
+for.inc64:                                        ; preds = %for.inc61, %for.cond25.preheader
+  %B.addr.9.lcssa = phi i64* [ %B.addr.814, %for.cond25.preheader ], [ %B.addr.10.lcssa, %for.inc61 ]
+  %inc65 = add nsw i64 %r.013, 1
+  %cmp23 = icmp slt i64 %inc65, %n
+  br i1 %cmp23, label %for.cond25.preheader, label %for.inc67
+
+for.inc67:                                        ; preds = %for.inc64, %for.cond22.preheader
+  %B.addr.8.lcssa = phi i64* [ %B.addr.718, %for.cond22.preheader ], [ %B.addr.9.lcssa, %for.inc64 ]
+  %inc68 = add nsw i64 %q.017, 1
+  %cmp20 = icmp slt i64 %inc68, %n
+  br i1 %cmp20, label %for.cond22.preheader, label %for.inc70
+
+for.inc70:                                        ; preds = %for.inc67, %for.cond19.preheader
+  %B.addr.7.lcssa = phi i64* [ %B.addr.622, %for.cond19.preheader ], [ %B.addr.8.lcssa, %for.inc67 ]
+  %inc71 = add nsw i64 %p.021, 1
+  %cmp17 = icmp slt i64 %inc71, %n
+  br i1 %cmp17, label %for.cond19.preheader, label %for.inc73
+
+for.inc73:                                        ; preds = %for.inc70, %for.cond16.preheader
+  %B.addr.6.lcssa = phi i64* [ %B.addr.526, %for.cond16.preheader ], [ %B.addr.7.lcssa, %for.inc70 ]
+  %inc74 = add nsw i64 %o.025, 1
+  %cmp14 = icmp slt i64 %inc74, %n
+  br i1 %cmp14, label %for.cond16.preheader, label %for.inc76
+
+for.inc76:                                        ; preds = %for.inc73, %for.cond13.preheader
+  %B.addr.5.lcssa = phi i64* [ %B.addr.430, %for.cond13.preheader ], [ %B.addr.6.lcssa, %for.inc73 ]
+  %inc77 = add nsw i64 %m.029, 1
+  %cmp11 = icmp slt i64 %inc77, %n
+  br i1 %cmp11, label %for.cond13.preheader, label %for.inc79
+
+for.inc79:                                        ; preds = %for.inc76, %for.cond10.preheader
+  %B.addr.4.lcssa = phi i64* [ %B.addr.334, %for.cond10.preheader ], [ %B.addr.5.lcssa, %for.inc76 ]
+  %inc80 = add nsw i64 %l.033, 1
+  %cmp8 = icmp slt i64 %inc80, %n
+  br i1 %cmp8, label %for.cond10.preheader, label %for.inc82
+
+for.inc82:                                        ; preds = %for.inc79, %for.cond7.preheader
+  %B.addr.3.lcssa = phi i64* [ %B.addr.238, %for.cond7.preheader ], [ %B.addr.4.lcssa, %for.inc79 ]
+  %inc83 = add nsw i64 %k.037, 1
+  %cmp5 = icmp slt i64 %inc83, %n
+  br i1 %cmp5, label %for.cond7.preheader, label %for.inc85
+
+for.inc85:                                        ; preds = %for.inc82, %for.cond4.preheader
+  %B.addr.2.lcssa = phi i64* [ %B.addr.142, %for.cond4.preheader ], [ %B.addr.3.lcssa, %for.inc82 ]
+  %inc86 = add nsw i64 %j.041, 1
+  %cmp2 = icmp slt i64 %inc86, %n
+  br i1 %cmp2, label %for.cond4.preheader, label %for.inc88
+
+for.inc88:                                        ; preds = %for.inc85, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i64* [ %B.addr.046, %for.cond1.preheader ], [ %B.addr.2.lcssa, %for.inc85 ]
+  %inc89 = add nsw i64 %i.045, 1
+  %cmp = icmp slt i64 %inc89, %n
+  br i1 %cmp, label %for.cond1.preheader, label %for.end90
+
+for.end90:                                        ; preds = %for.inc88, %entry
+  ret void
+}
+
+
+;; cleanup around chars, shorts, ints
+;;void p4(int *A, int *B, long int n)
+;;  for (char i = 0; i < n; i++)
+;;    A[i + 2] = ...
+;;    ... = A[i];
+
+define void @p4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp sgt i64 %n, 0
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i8 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv2 = sext i8 %i.03 to i32
+  %conv3 = sext i8 %i.03 to i64
+  %add = add i64 %conv3, 2
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv2, i32* %arrayidx, align 4
+  %idxprom4 = sext i8 %i.03 to i64
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %idxprom4
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - flow [*|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add i8 %i.03, 1
+  %conv = sext i8 %inc to i64
+  %cmp = icmp slt i64 %conv, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;void p5(int *A, int *B, long int n)
+;;  for (short i = 0; i < n; i++)
+;;    A[i + 2] = ...
+;;    ... = A[i];
+
+define void @p5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp sgt i64 %n, 0
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i16 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv2 = sext i16 %i.03 to i32
+  %conv3 = sext i16 %i.03 to i64
+  %add = add i64 %conv3, 2
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv2, i32* %arrayidx, align 4
+  %idxprom4 = sext i16 %i.03 to i64
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %idxprom4
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - flow [*|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add i16 %i.03, 1
+  %conv = sext i16 %inc to i64
+  %cmp = icmp slt i64 %conv, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;void p6(int *A, int *B, long int n)
+;;  for (int i = 0; i < n; i++)
+;;    A[i + 2] = ...
+;;    ... = A[i];
+
+define void @p6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp sgt i64 %n, 0
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %add = add nsw i32 %i.03, 2
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
+  store i32 %i.03, i32* %arrayidx, align 4
+  %idxprom2 = sext i32 %i.03 to i64
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2
+  %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - consistent flow [2]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add nsw i32 %i.03, 1
+  %conv = sext i32 %inc to i64
+  %cmp = icmp slt i64 %conv, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;void p7(unsigned *A, unsigned *B,  char n)
+;;  A[n] = ...
+;;  ... = A[n + 1];
+
+define void @p7(i32* %A, i32* %B, i8 signext %n) nounwind uwtable ssp {
+entry:
+  %idxprom = sext i8 %n to i64
+  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %conv = sext i8 %n to i64
+  %add = add i64 %conv, 1
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - none!
+  store i32 %0, i32* %B, align 4
+  ret void
+}
+
+
+
+;;void p8(unsigned *A, unsigned *B,  short n)
+;;  A[n] = ...
+;;  ... = A[n + 1];
+
+define void @p8(i32* %A, i32* %B, i16 signext %n) nounwind uwtable ssp {
+entry:
+  %idxprom = sext i16 %n to i64
+  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %conv = sext i16 %n to i64
+  %add = add i64 %conv, 1
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - none!
+  store i32 %0, i32* %B, align 4
+  ret void
+}
+
+
+;;void p9(unsigned *A, unsigned *B,  int n)
+;;  A[n] = ...
+;;  ... = A[n + 1];
+
+define void @p9(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %add = add nsw i32 %n, 1
+  %idxprom1 = sext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - none!
+  store i32 %0, i32* %B, align 4
+  ret void
+}
+
+
+;;void p10(unsigned *A, unsigned *B,  unsigned n)
+;;  A[n] = ...
+;;  ... = A[n + 1];
+
+define void @p10(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  %idxprom = zext i32 %n to i64
+  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %add = add i32 %n, 1
+  %idxprom1 = zext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - none!
+  store i32 %0, i32* %B, align 4
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/Propagating.ll b/test/Analysis/DependenceAnalysis/Propagating.ll
new file mode 100644
index 00000000000..076348c68dc
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/Propagating.ll
@@ -0,0 +1,467 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'Propagating.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[i + 1][i + j] = i;
+;;      *B++ = A[i][i + j];
+
+define void @prop0([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc9, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc9 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %add = add nsw i64 %i.03, %j.02
+  %add4 = add nsw i64 %i.03, 1
+  %arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %add4, i64 %add
+  store i32 %conv, i32* %arrayidx5, align 4
+  %add6 = add nsw i64 %i.03, %j.02
+  %arrayidx8 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add6
+  %0 = load i32* %arrayidx8, align 4
+; CHECK: da analyze - consistent flow [1 -1]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 100
+  br i1 %cmp2, label %for.body3, label %for.inc9
+
+for.inc9:                                         ; preds = %for.body3
+  %inc10 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc10, 100
+  br i1 %cmp, label %for.cond1.preheader, label %for.end11
+
+for.end11:                                        ; preds = %for.inc9
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      for (long int k = 0; k < 100; k++)
+;;        A[j - i][i + 1][j + k] = ...
+;;        ... = A[j - i][i][j + k];
+
+define void @prop1([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc18, %entry
+  %B.addr.06 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc18 ]
+  %i.05 = phi i64 [ 0, %entry ], [ %inc19, %for.inc18 ]
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.inc15, %for.cond1.preheader
+  %B.addr.14 = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %incdec.ptr, %for.inc15 ]
+  %j.03 = phi i64 [ 0, %for.cond1.preheader ], [ %inc16, %for.inc15 ]
+  br label %for.body6
+
+for.body6:                                        ; preds = %for.body6, %for.cond4.preheader
+  %k.02 = phi i64 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
+  %B.addr.21 = phi i32* [ %B.addr.14, %for.cond4.preheader ], [ %incdec.ptr, %for.body6 ]
+  %conv = trunc i64 %i.05 to i32
+  %add = add nsw i64 %j.03, %k.02
+  %add7 = add nsw i64 %i.05, 1
+  %sub = sub nsw i64 %j.03, %i.05
+  %arrayidx9 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub, i64 %add7, i64 %add
+  store i32 %conv, i32* %arrayidx9, align 4
+  %add10 = add nsw i64 %j.03, %k.02
+  %sub11 = sub nsw i64 %j.03, %i.05
+  %arrayidx14 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub11, i64 %i.05, i64 %add10
+  %0 = load i32* %arrayidx14, align 4
+; CHECK: da analyze - consistent flow [1 1 -1]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.21, i64 1
+  store i32 %0, i32* %B.addr.21, align 4
+  %inc = add nsw i64 %k.02, 1
+  %cmp5 = icmp slt i64 %inc, 100
+  br i1 %cmp5, label %for.body6, label %for.inc15
+
+for.inc15:                                        ; preds = %for.body6
+  %inc16 = add nsw i64 %j.03, 1
+  %cmp2 = icmp slt i64 %inc16, 100
+  br i1 %cmp2, label %for.cond4.preheader, label %for.inc18
+
+for.inc18:                                        ; preds = %for.inc15
+  %inc19 = add nsw i64 %i.05, 1
+  %cmp = icmp slt i64 %inc19, 100
+  br i1 %cmp, label %for.cond1.preheader, label %for.end20
+
+for.end20:                                        ; preds = %for.inc18
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[i - 1][2*i] = ...
+;;      ... = A[i][i + j + 110];
+
+define void @prop2([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc8, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc8 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %i.03, 1
+  %sub = add nsw i64 %i.03, -1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %sub, i64 %mul
+  store i32 %conv, i32* %arrayidx4, align 4
+  %add = add nsw i64 %i.03, %j.02
+  %add5 = add nsw i64 %add, 110
+  %arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add5
+  %0 = load i32* %arrayidx7, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 100
+  br i1 %cmp2, label %for.body3, label %for.inc8
+
+for.inc8:                                         ; preds = %for.body3
+  %inc9 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc9, 100
+  br i1 %cmp, label %for.cond1.preheader, label %for.end10
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[i][2*j + i] = ...
+;;      ... = A[i][2*j - i + 5];
+
+define void @prop3([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc9, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc9 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %j.02, 1
+  %add = add nsw i64 %mul, %i.03
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add
+  store i32 %conv, i32* %arrayidx4, align 4
+  %mul5 = shl nsw i64 %j.02, 1
+  %sub = sub nsw i64 %mul5, %i.03
+  %add6 = add nsw i64 %sub, 5
+  %arrayidx8 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add6
+  %0 = load i32* %arrayidx8, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 100
+  br i1 %cmp2, label %for.body3, label %for.inc9
+
+for.inc9:                                         ; preds = %for.body3
+  %inc10 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc10, 100
+  br i1 %cmp, label %for.cond1.preheader, label %for.end11
+
+for.end11:                                        ; preds = %for.inc9
+  ret void
+}
+
+
+;; propagate Distance
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[i + 2][2*i + j + 1] = ...
+;;      ... = A[i][2*i + j];
+
+define void @prop4([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc11, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc11 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc12, %for.inc11 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %i.03, 1
+  %add = add nsw i64 %mul, %j.02
+  %add4 = add nsw i64 %add, 1
+  %add5 = add nsw i64 %i.03, 2
+  %arrayidx6 = getelementptr inbounds [100 x i32]* %A, i64 %add5, i64 %add4
+  store i32 %conv, i32* %arrayidx6, align 4
+  %mul7 = shl nsw i64 %i.03, 1
+  %add8 = add nsw i64 %mul7, %j.02
+  %arrayidx10 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add8
+  %0 = load i32* %arrayidx10, align 4
+; CHECK: da analyze - consistent flow [2 -3]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 100
+  br i1 %cmp2, label %for.body3, label %for.inc11
+
+for.inc11:                                        ; preds = %for.body3
+  %inc12 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc12, 100
+  br i1 %cmp, label %for.cond1.preheader, label %for.end13
+
+for.end13:                                        ; preds = %for.inc11
+  ret void
+}
+
+
+;; propagate Point
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[3*i - 18][22 - i][2*i + j] = ...
+;;      ... = A[i][i][3*i + j];
+
+define void @prop5([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc13, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc13 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc14, %for.inc13 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %i.03, 1
+  %add = add nsw i64 %mul, %j.02
+  %sub = sub nsw i64 22, %i.03
+  %mul4 = mul nsw i64 %i.03, 3
+  %sub5 = add nsw i64 %mul4, -18
+  %arrayidx7 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub5, i64 %sub, i64 %add
+  store i32 %conv, i32* %arrayidx7, align 4
+  %mul8 = mul nsw i64 %i.03, 3
+  %add9 = add nsw i64 %mul8, %j.02
+  %arrayidx12 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.03, i64 %i.03, i64 %add9
+  %0 = load i32* %arrayidx12, align 4
+; CHECK: da analyze - flow [< -16] splitable!
+; CHECK: da analyze - split level = 1, iteration = 11!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 100
+  br i1 %cmp2, label %for.body3, label %for.inc13
+
+for.inc13:                                        ; preds = %for.body3
+  %inc14 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc14, 100
+  br i1 %cmp, label %for.cond1.preheader, label %for.end15
+
+for.end15:                                        ; preds = %for.inc13
+  ret void
+}
+
+
+;; propagate Line
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[i + 1][4*i + j + 2] = ...
+;;      ... = A[2*i][8*i + j];
+
+define void @prop6([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc12, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc12 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc13, %for.inc12 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %i.03, 2
+  %add = add nsw i64 %mul, %j.02
+  %add4 = add nsw i64 %add, 2
+  %add5 = add nsw i64 %i.03, 1
+  %arrayidx6 = getelementptr inbounds [100 x i32]* %A, i64 %add5, i64 %add4
+  store i32 %conv, i32* %arrayidx6, align 4
+  %mul7 = shl nsw i64 %i.03, 3
+  %add8 = add nsw i64 %mul7, %j.02
+  %mul9 = shl nsw i64 %i.03, 1
+  %arrayidx11 = getelementptr inbounds [100 x i32]* %A, i64 %mul9, i64 %add8
+  %0 = load i32* %arrayidx11, align 4
+; CHECK: da analyze - flow [=> -2]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 100
+  br i1 %cmp2, label %for.body3, label %for.inc12
+
+for.inc12:                                        ; preds = %for.body3
+  %inc13 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc13, 100
+  br i1 %cmp, label %for.cond1.preheader, label %for.end14
+
+for.end14:                                        ; preds = %for.inc12
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[2*i + 4][-5*i + j + 2] = ...
+;;      ... = A[-2*i + 20][5*i + j];
+
+define void @prop7([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc14, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc14 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc15, %for.inc14 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, -5
+  %add = add nsw i64 %mul, %j.02
+  %add4 = add nsw i64 %add, 2
+  %mul5 = shl nsw i64 %i.03, 1
+  %add6 = add nsw i64 %mul5, 4
+  %arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %add6, i64 %add4
+  store i32 %conv, i32* %arrayidx7, align 4
+  %mul8 = mul nsw i64 %i.03, 5
+  %add9 = add nsw i64 %mul8, %j.02
+  %mul10 = mul nsw i64 %i.03, -2
+  %add11 = add nsw i64 %mul10, 20
+  %arrayidx13 = getelementptr inbounds [100 x i32]* %A, i64 %add11, i64 %add9
+  %0 = load i32* %arrayidx13, align 4
+; CHECK: da analyze - flow [* -38] splitable!
+; CHECK: da analyze - split level = 1, iteration = 4!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 100
+  br i1 %cmp2, label %for.body3, label %for.inc14
+
+for.inc14:                                        ; preds = %for.body3
+  %inc15 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc15, 100
+  br i1 %cmp, label %for.cond1.preheader, label %for.end16
+
+for.end16:                                        ; preds = %for.inc14
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[4][j + 2] = ...
+;;      ... = A[-2*i + 4][5*i + j];
+
+define void @prop8([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc10, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc10 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc11, %for.inc10 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %add = add nsw i64 %j.02, 2
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 4, i64 %add
+  store i32 %conv, i32* %arrayidx4, align 4
+  %mul = mul nsw i64 %i.03, 5
+  %add5 = add nsw i64 %mul, %j.02
+  %mul6 = mul nsw i64 %i.03, -2
+  %add7 = add nsw i64 %mul6, 4
+  %arrayidx9 = getelementptr inbounds [100 x i32]* %A, i64 %add7, i64 %add5
+  %0 = load i32* %arrayidx9, align 4
+; CHECK: da analyze - flow [p<= 2]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 100
+  br i1 %cmp2, label %for.body3, label %for.inc10
+
+for.inc10:                                        ; preds = %for.body3
+  %inc11 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc11, 100
+  br i1 %cmp, label %for.cond1.preheader, label %for.end12
+
+for.end12:                                        ; preds = %for.inc10
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++)
+;;      A[2*i + 4][5*i + j + 2] = ...
+;;      ... = A[4][j];
+
+define void @prop9([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc10, %entry
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc10 ]
+  %i.03 = phi i64 [ 0, %entry ], [ %inc11, %for.inc10 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, 5
+  %add = add nsw i64 %mul, %j.02
+  %add4 = add nsw i64 %add, 2
+  %mul5 = shl nsw i64 %i.03, 1
+  %add6 = add nsw i64 %mul5, 4
+  %arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %add6, i64 %add4
+  store i32 %conv, i32* %arrayidx7, align 4
+  %arrayidx9 = getelementptr inbounds [100 x i32]* %A, i64 4, i64 %j.02
+  %0 = load i32* %arrayidx9, align 4
+; CHECK: da analyze - flow [p<= 2]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
+  store i32 %0, i32* %B.addr.11, align 4
+  %inc = add nsw i64 %j.02, 1
+  %cmp2 = icmp slt i64 %inc, 100
+  br i1 %cmp2, label %for.body3, label %for.inc10
+
+for.inc10:                                        ; preds = %for.body3
+  %inc11 = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc11, 100
+  br i1 %cmp, label %for.cond1.preheader, label %for.end12
+
+for.end12:                                        ; preds = %for.inc10
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/Separability.ll b/test/Analysis/DependenceAnalysis/Separability.ll
new file mode 100644
index 00000000000..d42d3cdb39e
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/Separability.ll
@@ -0,0 +1,267 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'Separability.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (long int i = 0; i < 50; i++)
+;;    for (long int j = 0; j < 50; j++)
+;;      for (long int k = 0; k < 50; k++)
+;;        for (long int l = 0; l < 50; l++)
+;;          A[n][i][j + k] = ...
+;;          ... = A[10][i + 10][2*j - l];
+
+define void @sep0([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc22, %entry
+  %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc22 ]
+  %i.07 = phi i64 [ 0, %entry ], [ %inc23, %for.inc22 ]
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.inc19, %for.cond1.preheader
+  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc19 ]
+  %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc20, %for.inc19 ]
+  br label %for.cond7.preheader
+
+for.cond7.preheader:                              ; preds = %for.inc16, %for.cond4.preheader
+  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc16 ]
+  %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc17, %for.inc16 ]
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %for.cond7.preheader
+  %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
+  %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
+  %conv = trunc i64 %i.07 to i32
+  %add = add nsw i64 %j.05, %k.03
+  %idxprom = sext i32 %n to i64
+  %arrayidx11 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %idxprom, i64 %i.07, i64 %add
+  store i32 %conv, i32* %arrayidx11, align 4
+  %mul = shl nsw i64 %j.05, 1
+  %sub = sub nsw i64 %mul, %l.02
+  %add12 = add nsw i64 %i.07, 10
+  %arrayidx15 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 10, i64 %add12, i64 %sub
+  %0 = load i32* %arrayidx15, align 4
+; CHECK: da analyze - flow [-10 * * *]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
+  store i32 %0, i32* %B.addr.31, align 4
+  %inc = add nsw i64 %l.02, 1
+  %cmp8 = icmp slt i64 %inc, 50
+  br i1 %cmp8, label %for.body9, label %for.inc16
+
+for.inc16:                                        ; preds = %for.body9
+  %inc17 = add nsw i64 %k.03, 1
+  %cmp5 = icmp slt i64 %inc17, 50
+  br i1 %cmp5, label %for.cond7.preheader, label %for.inc19
+
+for.inc19:                                        ; preds = %for.inc16
+  %inc20 = add nsw i64 %j.05, 1
+  %cmp2 = icmp slt i64 %inc20, 50
+  br i1 %cmp2, label %for.cond4.preheader, label %for.inc22
+
+for.inc22:                                        ; preds = %for.inc19
+  %inc23 = add nsw i64 %i.07, 1
+  %cmp = icmp slt i64 %inc23, 50
+  br i1 %cmp, label %for.cond1.preheader, label %for.end24
+
+for.end24:                                        ; preds = %for.inc22
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 50; i++)
+;;    for (long int j = 0; j < 50; j++)
+;;      for (long int k = 0; k < 50; k++)
+;;        for (long int l = 0; l < 50; l++)
+;;          A[i][i][j + k] = ...
+;;          ... = A[10][i + 10][2*j - l];
+
+define void @sep1([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc22, %entry
+  %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc22 ]
+  %i.07 = phi i64 [ 0, %entry ], [ %inc23, %for.inc22 ]
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.inc19, %for.cond1.preheader
+  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc19 ]
+  %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc20, %for.inc19 ]
+  br label %for.cond7.preheader
+
+for.cond7.preheader:                              ; preds = %for.inc16, %for.cond4.preheader
+  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc16 ]
+  %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc17, %for.inc16 ]
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %for.cond7.preheader
+  %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
+  %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
+  %conv = trunc i64 %i.07 to i32
+  %add = add nsw i64 %j.05, %k.03
+  %arrayidx11 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.07, i64 %i.07, i64 %add
+  store i32 %conv, i32* %arrayidx11, align 4
+  %mul = shl nsw i64 %j.05, 1
+  %sub = sub nsw i64 %mul, %l.02
+  %add12 = add nsw i64 %i.07, 10
+  %arrayidx15 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 10, i64 %add12, i64 %sub
+  %0 = load i32* %arrayidx15, align 4
+; CHECK: da analyze - flow [> * * *]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
+  store i32 %0, i32* %B.addr.31, align 4
+  %inc = add nsw i64 %l.02, 1
+  %cmp8 = icmp slt i64 %inc, 50
+  br i1 %cmp8, label %for.body9, label %for.inc16
+
+for.inc16:                                        ; preds = %for.body9
+  %inc17 = add nsw i64 %k.03, 1
+  %cmp5 = icmp slt i64 %inc17, 50
+  br i1 %cmp5, label %for.cond7.preheader, label %for.inc19
+
+for.inc19:                                        ; preds = %for.inc16
+  %inc20 = add nsw i64 %j.05, 1
+  %cmp2 = icmp slt i64 %inc20, 50
+  br i1 %cmp2, label %for.cond4.preheader, label %for.inc22
+
+for.inc22:                                        ; preds = %for.inc19
+  %inc23 = add nsw i64 %i.07, 1
+  %cmp = icmp slt i64 %inc23, 50
+  br i1 %cmp, label %for.cond1.preheader, label %for.end24
+
+for.end24:                                        ; preds = %for.inc22
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 50; i++)
+;;    for (long int j = 0; j < 50; j++)
+;;      for (long int k = 0; k < 50; k++)
+;;        for (long int l = 0; l < 50; l++)
+;;          A[i][i][i + k][l] = ...
+;;          ... = A[10][i + 10][j + k][l + 10];
+
+define void @sep2([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc26, %entry
+  %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc26 ]
+  %i.07 = phi i64 [ 0, %entry ], [ %inc27, %for.inc26 ]
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.inc23, %for.cond1.preheader
+  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc23 ]
+  %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc24, %for.inc23 ]
+  br label %for.cond7.preheader
+
+for.cond7.preheader:                              ; preds = %for.inc20, %for.cond4.preheader
+  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc20 ]
+  %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc21, %for.inc20 ]
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %for.cond7.preheader
+  %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
+  %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
+  %conv = trunc i64 %i.07 to i32
+  %add = add nsw i64 %i.07, %k.03
+  %arrayidx12 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 %i.07, i64 %i.07, i64 %add, i64 %l.02
+  store i32 %conv, i32* %arrayidx12, align 4
+  %add13 = add nsw i64 %l.02, 10
+  %add14 = add nsw i64 %j.05, %k.03
+  %add15 = add nsw i64 %i.07, 10
+  %arrayidx19 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 10, i64 %add15, i64 %add14, i64 %add13
+  %0 = load i32* %arrayidx19, align 4
+; CHECK: da analyze - flow [> * * -10]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
+  store i32 %0, i32* %B.addr.31, align 4
+  %inc = add nsw i64 %l.02, 1
+  %cmp8 = icmp slt i64 %inc, 50
+  br i1 %cmp8, label %for.body9, label %for.inc20
+
+for.inc20:                                        ; preds = %for.body9
+  %inc21 = add nsw i64 %k.03, 1
+  %cmp5 = icmp slt i64 %inc21, 50
+  br i1 %cmp5, label %for.cond7.preheader, label %for.inc23
+
+for.inc23:                                        ; preds = %for.inc20
+  %inc24 = add nsw i64 %j.05, 1
+  %cmp2 = icmp slt i64 %inc24, 50
+  br i1 %cmp2, label %for.cond4.preheader, label %for.inc26
+
+for.inc26:                                        ; preds = %for.inc23
+  %inc27 = add nsw i64 %i.07, 1
+  %cmp = icmp slt i64 %inc27, 50
+  br i1 %cmp, label %for.cond1.preheader, label %for.end28
+
+for.end28:                                        ; preds = %for.inc26
+  ret void
+}
+
+
+;;  for (long int i = 0; i < 50; i++)
+;;    for (long int j = 0; j < 50; j++)
+;;      for (long int k = 0; k < 50; k++)
+;;        for (long int l = 0; l < 50; l++)
+;;          A[i][i][i + k][l + k] = ...
+;;          ... = A[10][i + 10][j + k][l + 10];
+
+define void @sep3([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc27, %entry
+  %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc27 ]
+  %i.07 = phi i64 [ 0, %entry ], [ %inc28, %for.inc27 ]
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.inc24, %for.cond1.preheader
+  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc24 ]
+  %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc25, %for.inc24 ]
+  br label %for.cond7.preheader
+
+for.cond7.preheader:                              ; preds = %for.inc21, %for.cond4.preheader
+  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc21 ]
+  %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc22, %for.inc21 ]
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %for.cond7.preheader
+  %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
+  %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
+  %conv = trunc i64 %i.07 to i32
+  %add = add nsw i64 %l.02, %k.03
+  %add10 = add nsw i64 %i.07, %k.03
+  %arrayidx13 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 %i.07, i64 %i.07, i64 %add10, i64 %add
+  store i32 %conv, i32* %arrayidx13, align 4
+  %add14 = add nsw i64 %l.02, 10
+  %add15 = add nsw i64 %j.05, %k.03
+  %add16 = add nsw i64 %i.07, 10
+  %arrayidx20 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 10, i64 %add16, i64 %add15, i64 %add14
+  %0 = load i32* %arrayidx20, align 4
+; CHECK: da analyze - flow [> * * *]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
+  store i32 %0, i32* %B.addr.31, align 4
+  %inc = add nsw i64 %l.02, 1
+  %cmp8 = icmp slt i64 %inc, 50
+  br i1 %cmp8, label %for.body9, label %for.inc21
+
+for.inc21:                                        ; preds = %for.body9
+  %inc22 = add nsw i64 %k.03, 1
+  %cmp5 = icmp slt i64 %inc22, 50
+  br i1 %cmp5, label %for.cond7.preheader, label %for.inc24
+
+for.inc24:                                        ; preds = %for.inc21
+  %inc25 = add nsw i64 %j.05, 1
+  %cmp2 = icmp slt i64 %inc25, 50
+  br i1 %cmp2, label %for.cond4.preheader, label %for.inc27
+
+for.inc27:                                        ; preds = %for.inc24
+  %inc28 = add nsw i64 %i.07, 1
+  %cmp = icmp slt i64 %inc28, 50
+  br i1 %cmp, label %for.cond1.preheader, label %for.end29
+
+for.end29:                                        ; preds = %for.inc27
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/StrongSIV.ll b/test/Analysis/DependenceAnalysis/StrongSIV.ll
new file mode 100644
index 00000000000..be336c3580c
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/StrongSIV.ll
@@ -0,0 +1,342 @@
+; RUN: opt < %s -analyze -basicaa -indvars -da | FileCheck %s
+
+; ModuleID = 'StrongSIV.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (int i = 0; i < n; i++)
+;;    A[i + 2] = ...
+;;    ... = A[i];
+
+define void @strong0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp sgt i64 %n, 0
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %add = add nsw i32 %i.03, 2
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
+  store i32 %i.03, i32* %arrayidx, align 4
+  %idxprom2 = sext i32 %i.03 to i64
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2
+  %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - consistent flow [2]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add nsw i32 %i.03, 1
+  %conv = sext i32 %inc to i64
+  %cmp = icmp slt i64 %conv, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n; i++)
+;;    A[i + 2] = ...
+;;    ... = A[i];
+
+define void @strong1(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  %conv = sext i32 %n to i64
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv2 = trunc i64 %i.03 to i32
+  %add = add nsw i64 %i.03, 2
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv2, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 %i.03
+  %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - consistent flow [2]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp slt i64 %inc, %conv
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < n; i++)
+;;    A[i + 2] = ...
+;;    ... = A[i];
+
+define void @strong2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %add = add i64 %i.03, 2
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.03
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - consistent flow [2]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (int i = 0; i < n; i++)
+;;    A[i + 2] = ...
+;;    ... = A[i];
+
+define void @strong3(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %add = add nsw i32 %i.03, 2
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
+  store i32 %i.03, i32* %arrayidx, align 4
+  %idxprom1 = sext i32 %i.03 to i64
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - consistent flow [2]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add nsw i32 %i.03, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 19; i++)
+;;    A[i + 19] = ...
+;;    ... = A[i];
+
+define void @strong4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %add = add i64 %i.02, 19
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 19
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 20; i++)
+;;    A[i + 19] = ...
+;;    ... = A[i];
+
+define void @strong5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %add = add i64 %i.02, 19
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - consistent flow [19]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 20
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 20; i++)
+;;    A[2*i + 6] = ...
+;;    ... = A[2*i];
+
+define void @strong6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = shl i64 %i.02, 1
+  %add = add i64 %mul, 6
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul1 = shl i64 %i.02, 1
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %mul1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - consistent flow [3]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 20
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 20; i++)
+;;    A[2*i + 7] = ...
+;;    ... = A[2*i];
+
+define void @strong7(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = shl i64 %i.02, 1
+  %add = add i64 %mul, 7
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul1 = shl i64 %i.02, 1
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %mul1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 20
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 20; i++)
+;;    A[i + n] = ...
+;;    ... = A[i];
+
+define void @strong8(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %add = add i64 %i.02, %n
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - consistent flow [%n|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 20
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < n; i++)
+;;    A[i + n] = ...
+;;    ... = A[i + 2*n];
+
+define void @strong9(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %add = add i64 %i.03, %n
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul = shl i64 %n, 1
+  %add1 = add i64 %i.03, %mul
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %add1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 1000; i++)
+;;    A[n*i + 5] = ...
+;;    ... = A[n*i + 5];
+
+define void @strong10(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = mul i64 %i.02, %n
+  %add = add i64 %mul, 5
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul1 = mul i64 %i.02, %n
+  %add2 = add i64 %mul1, 5
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 %add2
+  %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - consistent flow [0|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 1000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
new file mode 100644
index 00000000000..2a1b4e7e971
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
@@ -0,0 +1,312 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'SymbolicRDIV.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (long int i = 0; i < n1; i++)
+;;    A[2*i + n1] = ...
+;;  for (long int j = 0; j < n2; j++)
+;;    ... = A[3*j + 3*n1];
+
+define void @symbolicrdiv0(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
+entry:
+  %cmp4 = icmp eq i64 %n1, 0
+  br i1 %cmp4, label %for.cond1.preheader, label %for.body
+
+for.cond1.preheader:                              ; preds = %for.body, %entry
+  %cmp21 = icmp eq i64 %n2, 0
+  br i1 %cmp21, label %for.end11, label %for.body4
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %conv = trunc i64 %i.05 to i32
+  %mul = shl nsw i64 %i.05, 1
+  %add = add i64 %mul, %n1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.05, 1
+  %cmp = icmp ult i64 %inc, %n1
+  br i1 %cmp, label %for.body, label %for.cond1.preheader
+
+for.body4:                                        ; preds = %for.body4, %for.cond1.preheader
+  %j.03 = phi i64 [ %inc10, %for.body4 ], [ 0, %for.cond1.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+  %mul56 = add i64 %j.03, %n1
+  %add7 = mul i64 %mul56, 3
+  %arrayidx8 = getelementptr inbounds i32* %A, i64 %add7
+  %0 = load i32* %arrayidx8, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc10 = add nsw i64 %j.03, 1
+  %cmp2 = icmp ult i64 %inc10, %n2
+  br i1 %cmp2, label %for.body4, label %for.end11
+
+for.end11:                                        ; preds = %for.body4, %for.cond1.preheader
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n1; i++)
+;;    A[2*i + 5*n2] = ...
+;;  for (long int j = 0; j < n2; j++)
+;;    ... = A[3*j + 2*n2];
+
+define void @symbolicrdiv1(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
+entry:
+  %cmp4 = icmp eq i64 %n1, 0
+  br i1 %cmp4, label %for.cond2.preheader, label %for.body
+
+for.cond2.preheader:                              ; preds = %for.body, %entry
+  %cmp31 = icmp eq i64 %n2, 0
+  br i1 %cmp31, label %for.end12, label %for.body5
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %conv = trunc i64 %i.05 to i32
+  %mul = shl nsw i64 %i.05, 1
+  %mul1 = mul i64 %n2, 5
+  %add = add i64 %mul, %mul1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.05, 1
+  %cmp = icmp ult i64 %inc, %n1
+  br i1 %cmp, label %for.body, label %for.cond2.preheader
+
+for.body5:                                        ; preds = %for.body5, %for.cond2.preheader
+  %j.03 = phi i64 [ %inc11, %for.body5 ], [ 0, %for.cond2.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body5 ], [ %B, %for.cond2.preheader ]
+  %mul6 = mul nsw i64 %j.03, 3
+  %mul7 = shl i64 %n2, 1
+  %add8 = add i64 %mul6, %mul7
+  %arrayidx9 = getelementptr inbounds i32* %A, i64 %add8
+  %0 = load i32* %arrayidx9, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc11 = add nsw i64 %j.03, 1
+  %cmp3 = icmp ult i64 %inc11, %n2
+  br i1 %cmp3, label %for.body5, label %for.end12
+
+for.end12:                                        ; preds = %for.body5, %for.cond2.preheader
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n1; i++)
+;;    A[2*i - n2] = ...
+;;  for (long int j = 0; j < n2; j++)
+;;    ... = A[-j + 2*n1];
+
+define void @symbolicrdiv2(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
+entry:
+  %cmp4 = icmp eq i64 %n1, 0
+  br i1 %cmp4, label %for.cond1.preheader, label %for.body
+
+for.cond1.preheader:                              ; preds = %for.body, %entry
+  %cmp21 = icmp eq i64 %n2, 0
+  br i1 %cmp21, label %for.end10, label %for.body4
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %conv = trunc i64 %i.05 to i32
+  %mul = shl nsw i64 %i.05, 1
+  %sub = sub i64 %mul, %n2
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.05, 1
+  %cmp = icmp ult i64 %inc, %n1
+  br i1 %cmp, label %for.body, label %for.cond1.preheader
+
+for.body4:                                        ; preds = %for.body4, %for.cond1.preheader
+  %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+  %mul6 = shl i64 %n1, 1
+  %add = sub i64 %mul6, %j.03
+  %arrayidx7 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx7, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc9 = add nsw i64 %j.03, 1
+  %cmp2 = icmp ult i64 %inc9, %n2
+  br i1 %cmp2, label %for.body4, label %for.end10
+
+for.end10:                                        ; preds = %for.body4, %for.cond1.preheader
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n1; i++)
+;;    A[-i + n2] = ...
+;;  for (long int j = 0; j < n2; j++)
+;;    ... = A[j - n1];
+
+define void @symbolicrdiv3(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
+entry:
+  %cmp4 = icmp eq i64 %n1, 0
+  br i1 %cmp4, label %for.cond1.preheader, label %for.body
+
+for.cond1.preheader:                              ; preds = %for.body, %entry
+  %cmp21 = icmp eq i64 %n2, 0
+  br i1 %cmp21, label %for.end9, label %for.body4
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %conv = trunc i64 %i.05 to i32
+  %add = sub i64 %n2, %i.05
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.05, 1
+  %cmp = icmp ult i64 %inc, %n1
+  br i1 %cmp, label %for.body, label %for.cond1.preheader
+
+for.body4:                                        ; preds = %for.body4, %for.cond1.preheader
+  %j.03 = phi i64 [ %inc8, %for.body4 ], [ 0, %for.cond1.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+  %sub5 = sub i64 %j.03, %n1
+  %arrayidx6 = getelementptr inbounds i32* %A, i64 %sub5
+  %0 = load i32* %arrayidx6, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc8 = add nsw i64 %j.03, 1
+  %cmp2 = icmp ult i64 %inc8, %n2
+  br i1 %cmp2, label %for.body4, label %for.end9
+
+for.end9:                                         ; preds = %for.body4, %for.cond1.preheader
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n1; i++)
+;;    A[-i + 2*n1] = ...
+;;  for (long int j = 0; j < n2; j++)
+;;    ... = A[-j + n1];
+
+define void @symbolicrdiv4(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
+entry:
+  %cmp4 = icmp eq i64 %n1, 0
+  br i1 %cmp4, label %for.cond1.preheader, label %for.body
+
+for.cond1.preheader:                              ; preds = %for.body, %entry
+  %cmp21 = icmp eq i64 %n2, 0
+  br i1 %cmp21, label %for.end10, label %for.body4
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %conv = trunc i64 %i.05 to i32
+  %mul = shl i64 %n1, 1
+  %add = sub i64 %mul, %i.05
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.05, 1
+  %cmp = icmp ult i64 %inc, %n1
+  br i1 %cmp, label %for.body, label %for.cond1.preheader
+
+for.body4:                                        ; preds = %for.body4, %for.cond1.preheader
+  %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+  %add6 = sub i64 %n1, %j.03
+  %arrayidx7 = getelementptr inbounds i32* %A, i64 %add6
+  %0 = load i32* %arrayidx7, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc9 = add nsw i64 %j.03, 1
+  %cmp2 = icmp ult i64 %inc9, %n2
+  br i1 %cmp2, label %for.body4, label %for.end10
+
+for.end10:                                        ; preds = %for.body4, %for.cond1.preheader
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n1; i++)
+;;    A[-i + n2] = ...
+;;  for (long int j = 0; j < n2; j++)
+;;    ... = A[-j + 2*n2];
+
+define void @symbolicrdiv5(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
+entry:
+  %cmp4 = icmp eq i64 %n1, 0
+  br i1 %cmp4, label %for.cond1.preheader, label %for.body
+
+for.cond1.preheader:                              ; preds = %for.body, %entry
+  %cmp21 = icmp eq i64 %n2, 0
+  br i1 %cmp21, label %for.end10, label %for.body4
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %conv = trunc i64 %i.05 to i32
+  %add = sub i64 %n2, %i.05
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.05, 1
+  %cmp = icmp ult i64 %inc, %n1
+  br i1 %cmp, label %for.body, label %for.cond1.preheader
+
+for.body4:                                        ; preds = %for.body4, %for.cond1.preheader
+  %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+  %mul = shl i64 %n2, 1
+  %add6 = sub i64 %mul, %j.03
+  %arrayidx7 = getelementptr inbounds i32* %A, i64 %add6
+  %0 = load i32* %arrayidx7, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc9 = add nsw i64 %j.03, 1
+  %cmp2 = icmp ult i64 %inc9, %n2
+  br i1 %cmp2, label %for.body4, label %for.end10
+
+for.end10:                                        ; preds = %for.body4, %for.cond1.preheader
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n1; i++)
+;;    for (long int j = 0; j < n2; j++)
+;;      A[j -i + n2] = ...
+;;      ... = A[2*n2];
+
+define void @symbolicrdiv6(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
+entry:
+  %cmp4 = icmp eq i64 %n1, 0
+  br i1 %cmp4, label %for.end7, label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc5, %entry
+  %B.addr.06 = phi i32* [ %B.addr.1.lcssa, %for.inc5 ], [ %B, %entry ]
+  %i.05 = phi i64 [ %inc6, %for.inc5 ], [ 0, %entry ]
+  %cmp21 = icmp eq i64 %n2, 0
+  br i1 %cmp21, label %for.inc5, label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.03 = phi i64 [ %inc, %for.body3 ], [ 0, %for.cond1.preheader ]
+  %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.06, %for.cond1.preheader ]
+  %conv = trunc i64 %i.05 to i32
+  %sub = sub nsw i64 %j.03, %i.05
+  %add = add i64 %sub, %n2
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul = shl i64 %n2, 1
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 %mul
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
+  store i32 %0, i32* %B.addr.12, align 4
+  %inc = add nsw i64 %j.03, 1
+  %cmp2 = icmp ult i64 %inc, %n2
+  br i1 %cmp2, label %for.body3, label %for.inc5
+
+for.inc5:                                         ; preds = %for.body3, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+  %inc6 = add nsw i64 %i.05, 1
+  %cmp = icmp ult i64 %inc6, %n1
+  br i1 %cmp, label %for.cond1.preheader, label %for.end7
+
+for.end7:                                         ; preds = %for.inc5, %entry
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
new file mode 100644
index 00000000000..ee2343fa51e
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -0,0 +1,330 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'SymbolicSIV.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (long int i = 0; i < n; i++)
+;;    A[2*i + n] = ...
+;;    ... = A[3*i + 3*n];
+
+define void @symbolicsiv0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %i.03, 1
+  %add = add i64 %mul, %n
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul14 = add i64 %i.03, %n
+  %add3 = mul i64 %mul14, 3
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 %add3
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n; i++)
+;;    A[2*i + 5*n] = ...
+;;    ... = A[3*i + 2*n];
+
+define void @symbolicsiv1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %i.03, 1
+  %mul1 = mul i64 %n, 5
+  %add = add i64 %mul, %mul1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul2 = mul nsw i64 %i.03, 3
+  %mul3 = shl i64 %n, 1
+  %add4 = add i64 %mul2, %mul3
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %add4
+  %0 = load i32* %arrayidx5, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n; i++)
+;;    A[2*i - n] = ...
+;;    ... = A[-i + 2*n];
+
+define void @symbolicsiv2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl nsw i64 %i.03, 1
+  %sub = sub i64 %mul, %n
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul2 = shl i64 %n, 1
+  %add = sub i64 %mul2, %i.03
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n; i++)
+;;    A[-2*i + n + 1] = ...
+;;    ... = A[i - 2*n];
+
+define void @symbolicsiv3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, -2
+  %add = add i64 %mul, %n
+  %add1 = add i64 %add, 1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add1
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul2 = shl i64 %n, 1
+  %sub = sub i64 %i.03, %mul2
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n; i++)
+;;    A[-2*i + 3*n] = ...
+;;    ... = A[-i + n];
+
+define void @symbolicsiv4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, -2
+  %mul1 = mul i64 %n, 3
+  %add = add i64 %mul, %mul1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %add2 = sub i64 %n, %i.03
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 %add2
+  %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long int i = 0; i < n; i++)
+;;    A[-2*i - 2*n] = ...
+;;    ... = A[-i - n];
+
+define void @symbolicsiv5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul nsw i64 %i.03, -2
+  %mul1 = shl i64 %n, 1
+  %sub = sub i64 %mul, %mul1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %sub
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub2 = sub nsw i64 0, %i.03
+  %sub3 = sub i64 %sub2, %n
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 %sub3
+  %0 = load i32* %arrayidx4, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add nsw i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;; why doesn't SCEV package understand that n >= 0?
+;;void weaktest(int *A, int *B, long unsigned n)
+;;  for (long unsigned i = 0; i < n; i++)
+;;    A[i + n + 1] = ...
+;;    ... = A[-i];
+
+define void @weaktest(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %add = add i64 %i.03, %n
+  %add1 = add i64 %add, 1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add1
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub = sub i64 0, %i.03
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - flow [*|<] splitable!
+; CHECK: da analyze - split level = 1, iteration = ((0 smax (-1 + (-1 * %n))) /u 2)!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  void symbolicsiv6(int *A, int *B, long unsigned n, long unsigned N, long unsigned M) {
+;;    for (long int i = 0; i < n; i++) {
+;;      A[4*N*i + M] = i;
+;;      *B++ = A[4*N*i + 3*M + 1];
+
+define void @symbolicsiv6(i32* %A, i32* %B, i64 %n, i64 %N, i64 %M) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl i64 %N, 2
+  %mul1 = mul i64 %mul, %i.03
+  %add = add i64 %mul1, %M
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul2 = shl i64 %N, 2
+  %mul3 = mul i64 %mul2, %i.03
+  %mul4 = mul i64 %M, 3
+  %add5 = add i64 %mul3, %mul4
+  %add6 = add i64 %add5, 1
+  %arrayidx7 = getelementptr inbounds i32* %A, i64 %add6
+  %0 = load i32* %arrayidx7, align 4
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+; CHECK: da analyze - none!
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add nsw i64 %i.03, 1
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
+
+;;  void symbolicsiv7(int *A, int *B, long unsigned n, long unsigned N, long unsigned M) {
+;;    for (long int i = 0; i < n; i++) {
+;;      A[2*N*i + M] = i;
+;;      *B++ = A[2*N*i - 3*M + 2];
+
+define void @symbolicsiv7(i32* %A, i32* %B, i64 %n, i64 %N, i64 %M) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = shl i64 %N, 1
+  %mul1 = mul i64 %mul, %i.03
+  %add = add i64 %mul1, %M
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul2 = shl i64 %N, 1
+  %mul3 = mul i64 %mul2, %i.03
+  %0 = mul i64 %M, -3
+  %sub = add i64 %mul3, %0
+  %add5 = add i64 %sub, 2
+  %arrayidx6 = getelementptr inbounds i32* %A, i64 %add5
+  %1 = load i32* %arrayidx6, align 4
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+; CHECK: da analyze - flow [<>]!
+  store i32 %1, i32* %B.addr.02, align 4
+  %inc = add nsw i64 %i.03, 1
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll b/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
new file mode 100644
index 00000000000..343e8f49bf9
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
@@ -0,0 +1,220 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'WeakCrossingSIV.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (long unsigned i = 0; i < n; i++)
+;;    A[1 + n*i] = ...
+;;    ... = A[1 - n*i];
+
+define void @weakcrossing0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul i64 %i.03, %n
+  %add = add i64 %mul, 1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul1 = mul i64 %i.03, %n
+  %sub = sub i64 1, %mul1
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - flow [0|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < n; i++)
+;;    A[n + i] = ...
+;;    ... = A[1 + n - i];
+
+define void @weakcrossing1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %add = add i64 %i.03, %n
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %add1 = add i64 %n, 1
+  %sub = sub i64 %add1, %i.03
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - flow [<>] splitable!
+; CHECK: da analyze - split level = 1, iteration = 0!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 3; i++)
+;;    A[i] = ...
+;;    ... = A[6 - i];
+
+define void @weakcrossing2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %arrayidx = getelementptr inbounds i32* %A, i64 %i.02
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub = sub i64 6, %i.02
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 3
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 4; i++)
+;;    A[i] = ...
+;;    ... = A[6 - i];
+
+define void @weakcrossing3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %arrayidx = getelementptr inbounds i32* %A, i64 %i.02
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub = sub i64 6, %i.02
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [0|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 4
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 10; i++)
+;;    A[i] = ...
+;;    ... = A[-6 - i];
+
+define void @weakcrossing4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %arrayidx = getelementptr inbounds i32* %A, i64 %i.02
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub = sub i64 -6, %i.02
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < n; i++)
+;;    A[3*i] = ...
+;;    ... = A[5 - 3*i];
+
+define void @weakcrossing5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul i64 %i.03, 3
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %0 = mul i64 %i.03, -3
+  %sub = add i64 %0, 5
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
+  %1 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %1, i32* %B.addr.02, align 4
+  %inc = add i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 4; i++)
+;;    A[i] = ...
+;;    ... = A[5 - i];
+
+define void @weakcrossing6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %arrayidx = getelementptr inbounds i32* %A, i64 %i.02
+  store i32 %conv, i32* %arrayidx, align 4
+  %sub = sub i64 5, %i.02
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [<>] splitable!
+; CHECK: da analyze - split level = 1, iteration = 2!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 4
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll b/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
new file mode 100644
index 00000000000..a59871602b6
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
@@ -0,0 +1,212 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'WeakZeroDstSIV.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (long unsigned i = 0; i < 30; i++)
+;;    A[2*i + 10] = ...
+;;    ... = A[10];
+
+define void @weakzerodst0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = shl i64 %i.02, 1
+  %add = add i64 %mul, 10
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 10
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [p<=|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 30
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < n; i++)
+;;    A[n*i + 10] = ...
+;;    ... = A[10];
+
+define void @weakzerodst1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul i64 %i.03, %n
+  %add = add i64 %mul, 10
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 10
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [p<=|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 5; i++)
+;;    A[2*i] = ...
+;;    ... = A[10];
+
+define void @weakzerodst2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = shl i64 %i.02, 1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 10
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 5
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 6; i++)
+;;    A[2*i] = ...
+;;    ... = A[10];
+
+define void @weakzerodst3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = shl i64 %i.02, 1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 10
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [=>p|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 6
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 7; i++)
+;;    A[2*i] = ...
+;;    ... = A[10];
+
+define void @weakzerodst4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = shl i64 %i.02, 1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 10
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [*|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 7
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 7; i++)
+;;    A[2*i] = ...
+;;    ... = A[-10];
+
+define void @weakzerodst5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %mul = shl i64 %i.02, 1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 -10
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 7
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < n; i++)
+;;    A[3*i] = ...
+;;    ... = A[10];
+
+define void @weakzerodst6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %mul = mul i64 %i.03, 3
+  %arrayidx = getelementptr inbounds i32* %A, i64 %mul
+  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 10
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll b/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
new file mode 100644
index 00000000000..fd4f4626954
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
@@ -0,0 +1,212 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'WeakZeroSrcSIV.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  for (long unsigned i = 0; i < 30; i++)
+;;    A[10] = ...
+;;    ... = A[2*i + 10];
+
+define void @weakzerosrc0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %arrayidx = getelementptr inbounds i32* %A, i64 10
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul = shl i64 %i.02, 1
+  %add = add i64 %mul, 10
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [p<=|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 30
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < n; i++)
+;;    A[10] = ...
+;;    ... = A[n*i + 10];
+
+define void @weakzerosrc1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %arrayidx = getelementptr inbounds i32* %A, i64 10
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul = mul i64 %i.03, %n
+  %add = add i64 %mul, 10
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [p<=|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 5; i++)
+;;    A[10] = ...
+;;    ... = A[2*i];
+
+define void @weakzerosrc2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %arrayidx = getelementptr inbounds i32* %A, i64 10
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul = shl i64 %i.02, 1
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 5
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 6; i++)
+;;    A[10] = ...
+;;    ... = A[2*i];
+
+define void @weakzerosrc3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %arrayidx = getelementptr inbounds i32* %A, i64 10
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul = shl i64 %i.02, 1
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [=>p|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 6
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 7; i++)
+;;    A[10] = ...
+;;    ... = A[2*i];
+
+define void @weakzerosrc4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %arrayidx = getelementptr inbounds i32* %A, i64 10
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul = shl i64 %i.02, 1
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow [*|<]!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 7
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < 7; i++)
+;;    A[-10] = ...
+;;    ... = A[2*i];
+
+define void @weakzerosrc5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %conv = trunc i64 %i.02 to i32
+  %arrayidx = getelementptr inbounds i32* %A, i64 -10
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul = shl i64 %i.02, 1
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
+  store i32 %0, i32* %B.addr.01, align 4
+  %inc = add i64 %i.02, 1
+  %cmp = icmp ult i64 %inc, 7
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+;;  for (long unsigned i = 0; i < n; i++)
+;;    A[10] = ...
+;;    ... = A[3*i];
+
+define void @weakzerosrc6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp eq i64 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %conv = trunc i64 %i.03 to i32
+  %arrayidx = getelementptr inbounds i32* %A, i64 10
+  store i32 %conv, i32* %arrayidx, align 4
+  %mul = mul i64 %i.03, 3
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
+  store i32 %0, i32* %B.addr.02, align 4
+  %inc = add i64 %i.03, 1
+  %cmp = icmp ult i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/ZIV.ll b/test/Analysis/DependenceAnalysis/ZIV.ll
new file mode 100644
index 00000000000..42b2389df26
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/ZIV.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; ModuleID = 'ZIV.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+
+;;  A[n + 1] = ...
+;;  ... = A[1 + n];
+
+define void @z0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %add = add i64 %n, 1
+  %arrayidx = getelementptr inbounds i32* %A, i64 %add
+  store i32 0, i32* %arrayidx, align 4
+  %add1 = add i64 %n, 1
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %add1
+  %0 = load i32* %arrayidx2, align 4
+; CHECK: da analyze - consistent flow!
+  store i32 %0, i32* %B, align 4
+  ret void
+}
+
+
+;;  A[n] = ...
+;;  ... = A[n + 1];
+
+define void @z1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
+entry:
+  %arrayidx = getelementptr inbounds i32* %A, i64 %n
+  store i32 0, i32* %arrayidx, align 4
+  %add = add i64 %n, 1
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - none!
+  store i32 %0, i32* %B, align 4
+  ret void
+}
+
+
+;;  A[n] = ...
+;;  ... = A[m];
+
+define void @z2(i32* %A, i32* %B, i64 %n, i64 %m) nounwind uwtable ssp {
+entry:
+  %arrayidx = getelementptr inbounds i32* %A, i64 %n
+  store i32 0, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 %m
+  %0 = load i32* %arrayidx1, align 4
+; CHECK: da analyze - flow!
+  store i32 %0, i32* %B, align 4
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/lit.local.cfg b/test/Analysis/DependenceAnalysis/lit.local.cfg
new file mode 100644
index 00000000000..c6106e4746f
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Assembler/invalid-fwdref1.ll b/test/Assembler/invalid-fwdref1.ll
new file mode 100644
index 00000000000..ef8b16cadce
--- /dev/null
+++ b/test/Assembler/invalid-fwdref1.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -disable-output 2>&1 | grep "invalid forward reference to function as global value!"
+
+define i8* @test1() { ret i8* @test1a }
+define void @test1a() { }
diff --git a/test/Bitcode/function-encoding-rel-operands.ll b/test/Bitcode/function-encoding-rel-operands.ll
new file mode 100644
index 00000000000..aedb0c32676
--- /dev/null
+++ b/test/Bitcode/function-encoding-rel-operands.ll
@@ -0,0 +1,49 @@
+; Basic sanity test to check that instruction operands are encoded with
+; relative IDs.
+; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s
+
+; CHECK: FUNCTION_BLOCK
+; CHECK: INST_BINOP {{.*}}op0=1 op1=1
+; CHECK: INST_BINOP {{.*}}op0=1 op1=1
+; CHECK: INST_BINOP {{.*}}op0=1 op1=1
+; CHECK: INST_RET {{.*}}op0=1
+define i32 @test_int_binops(i32 %a) nounwind {
+entry:
+  %0 = add i32 %a, %a
+  %1 = sub i32 %0, %0
+  %2 = mul i32 %1, %1
+  ret i32 %2
+}
+
+
+; CHECK: FUNCTION_BLOCK
+; CHECK: INST_CAST {{.*}}op0=1
+; CHECK: INST_BINOP {{.*}}op0=1 op1=1
+; CHECK: INST_BINOP {{.*}}op0=1 op1=1
+; CHECK: INST_BINOP {{.*}}op0=1 op1=1
+; CHECK: INST_BINOP {{.*}}op0=1 op1=1
+; CHECK: INST_RET {{.*}}op0=1
+define double @test_float_binops(i32 %a) nounwind {
+  %1 = sitofp i32 %a to double
+  %2 = fadd double %1, %1
+  %3 = fsub double %2, %2
+  %4 = fmul double %3, %3
+  %5 = fdiv double %4, %4
+  ret double %5
+}
+
+
+; CHECK: FUNCTION_BLOCK
+; skip checking operands of INST_INBOUNDS_GEP since that depends on ordering
+; between literals and the formal parameters.
+; CHECK: INST_INBOUNDS_GEP {{.*}}
+; CHECK: INST_LOAD {{.*}}op0=1 {{.*}}
+; CHECK: INST_CMP2 op0=1 {{.*}}
+; CHECK: INST_RET {{.*}}op0=1
+define i1 @test_load(i32 %a, {i32, i32}* %ptr) nounwind {
+entry:
+  %0 = getelementptr inbounds {i32, i32}* %ptr, i32 %a, i32 0
+  %1 = load i32* %0
+  %2 = icmp eq i32 %1, %a
+  ret i1 %2
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index bc226ca2283..4f099a922a8 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -19,7 +19,7 @@ add_lit_testsuite(check-llvm "Running the LLVM regression tests"
   DEPENDS UnitTests
           BugpointPasses LLVMHello
           llc lli llvm-ar llvm-as
-          llvm-diff
+          llvm-bcanalyzer llvm-diff
           llvm-dis llvm-extract llvm-dwarfdump
           llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj
           macho-dump opt
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
index 3e78c462385..101a91396eb 100644
--- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -1,4 +1,9 @@
 ; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
+
+; tail call inside a function where byval argument is splitted between
+; registers and stack is currently unsupported.
+; XFAIL: *
+
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-ios"
 
diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
new file mode 100644
index 00000000000..b5f6d311cb9
--- /dev/null
+++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
+; Test that we correctly use registers and align elements when using va_arg
+
+%struct_t = type { double, double, double }
+@static_val = constant %struct_t { double 1.0, double 2.0, double 3.0 }
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_end(i8*) nounwind
+
+; CHECK: test_byval_8_bytes_alignment:
+define void @test_byval_8_bytes_alignment(i32 %i, ...) {
+entry:
+; CHECK: stm     r0, {r1, r2, r3}
+  %g = alloca i8*
+  %g1 = bitcast i8** %g to i8*
+  call void @llvm.va_start(i8* %g1)
+
+; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
+; CHECK: bfc	[[REG]], #0, #3
+  %0 = va_arg i8** %g, double
+  call void @llvm.va_end(i8* %g1)
+  
+  ret void
+}
+
+; CHECK: main:
+; CHECK: ldm     r0, {r2, r3}
+define i32 @main() {
+entry:
+  call void (i32, ...)* @test_byval_8_bytes_alignment(i32 555, %struct_t* byval @static_val)
+  ret i32 0
+}
+
+declare void @f(double);
+
+; CHECK:     test_byval_8_bytes_alignment_fixed_arg:
+; CHECK-NOT:   str     r1
+; CHECK:       str     r3, [sp, #12]
+; CHECK:       str     r2, [sp, #8]
+; CHECK-NOT:   str     r1
+define void @test_byval_8_bytes_alignment_fixed_arg(i32 %n1, %struct_t* byval %val) nounwind {
+entry:
+  %a = getelementptr inbounds %struct_t* %val, i32 0, i32 0
+  %0 = load double* %a
+  call void (double)* @f(double %0)
+  ret void
+}
+
+; CHECK: main_fixed_arg:
+; CHECK: ldm     r0, {r2, r3}
+define i32 @main_fixed_arg() {
+entry:
+  call void (i32, %struct_t*)* @test_byval_8_bytes_alignment_fixed_arg(i32 555, %struct_t* byval @static_val)
+  ret i32 0
+}
+
diff --git a/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll b/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll
index 6eb8fcb2db9..f2395107d42 100644
--- a/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll
+++ b/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll
@@ -1,23 +1,16 @@
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-none-linux- | FileCheck %s
 ; Check that LDRB_POST_IMM instruction emitted properly.
 
-%my_struct_t = type { double, double, double }
-@main.val = private unnamed_addr constant %my_struct_t { double 1.0, double 2.0, double 3.0 }, align 8
-
-declare void @f(i32 %n1, %my_struct_t* byval %val);
+%my_struct_t = type { i8, i8, i8, i8, i8 }
+@main.val = private unnamed_addr constant %my_struct_t { i8 1, i8 2, i8 3, i8 4, i8 5 }
 
+declare void @f(i32 %n1, i32 %n2, i32 %n3, %my_struct_t* byval %val);
 
 ; CHECK: main:
 define i32 @main() nounwind {
 entry:
-  %val = alloca %my_struct_t, align 8
-  %0 = bitcast %my_struct_t* %val to i8*
-
 ; CHECK: ldrb	{{(r[0-9]+)}}, {{(\[r[0-9]+\])}}, #1
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast (%my_struct_t* @main.val to i8*), i32 24, i32 8, i1 false)
-
-  call void @f(i32 555, %my_struct_t* byval %val)
+  call void @f(i32 555, i32 555, i32 555, %my_struct_t* byval @main.val)
   ret i32 0
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index 6e1f17dced1..238ba24a797 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cortex-a9 -verify-coalescing | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a9 -verify-coalescing -verify-machineinstrs | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios0.0.0"
 
@@ -214,3 +214,78 @@ loop.end:
  %d.end = phi double [ 0.0, %entry ], [ %add, %after_inner_loop ]
  ret void
 }
+
+; CHECK: pr14078
+define arm_aapcs_vfpcc i32 @pr14078(i8* nocapture %arg, i8* nocapture %arg1, i32 %arg2) nounwind uwtable readonly {
+bb:
+  br i1 undef, label %bb31, label %bb3
+
+bb3:                                              ; preds = %bb12, %bb
+  %tmp = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp4 = bitcast <1 x i64> %tmp to <2 x float>
+  %tmp5 = shufflevector <2 x float> %tmp4, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp6 = bitcast <4 x float> %tmp5 to <2 x i64>
+  %tmp7 = shufflevector <2 x i64> %tmp6, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp8 = bitcast <1 x i64> %tmp7 to <2 x float>
+  %tmp9 = tail call <2 x float> @baz(<2 x float> <float 0xFFFFFFFFE0000000, float 0.000000e+00>, <2 x float> %tmp8, <2 x float> zeroinitializer) nounwind
+  br i1 undef, label %bb10, label %bb12
+
+bb10:                                             ; preds = %bb3
+  %tmp11 = load <4 x float>* undef, align 8
+  br label %bb12
+
+bb12:                                             ; preds = %bb10, %bb3
+  %tmp13 = shufflevector <2 x float> %tmp9, <2 x float> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  %tmp14 = bitcast <2 x float> %tmp13 to <1 x i64>
+  %tmp15 = shufflevector <1 x i64> %tmp14, <1 x i64> zeroinitializer, <2 x i32> <i32 0, i32 1>
+  %tmp16 = bitcast <2 x i64> %tmp15 to <4 x float>
+  %tmp17 = fmul <4 x float> zeroinitializer, %tmp16
+  %tmp18 = bitcast <4 x float> %tmp17 to <2 x i64>
+  %tmp19 = shufflevector <2 x i64> %tmp18, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp20 = bitcast <1 x i64> %tmp19 to <2 x float>
+  %tmp21 = tail call <2 x float> @baz67(<2 x float> %tmp20, <2 x float> undef) nounwind
+  %tmp22 = tail call <2 x float> @baz67(<2 x float> %tmp21, <2 x float> %tmp21) nounwind
+  %tmp23 = shufflevector <2 x float> %tmp22, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp24 = bitcast <4 x float> %tmp23 to <2 x i64>
+  %tmp25 = shufflevector <2 x i64> %tmp24, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp26 = bitcast <1 x i64> %tmp25 to <2 x float>
+  %tmp27 = extractelement <2 x float> %tmp26, i32 0
+  %tmp28 = fcmp olt float %tmp27, 0.000000e+00
+  %tmp29 = select i1 %tmp28, i32 0, i32 undef
+  %tmp30 = icmp ult i32 undef, %arg2
+  br i1 %tmp30, label %bb3, label %bb31
+
+bb31:                                             ; preds = %bb12, %bb
+  %tmp32 = phi i32 [ 1, %bb ], [ %tmp29, %bb12 ]
+  ret i32 %tmp32
+}
+
+declare <2 x float> @baz(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
+
+declare <2 x float> @baz67(<2 x float>, <2 x float>) nounwind readnone
+
+%struct.wombat.5 = type { %struct.quux, %struct.quux, %struct.quux, %struct.quux }
+%struct.quux = type { <4 x float> }
+
+; CHECK: pr14079
+define linkonce_odr arm_aapcs_vfpcc %struct.wombat.5 @pr14079(i8* nocapture %arg, i8* nocapture %arg1, i8* nocapture %arg2) nounwind uwtable inlinehint {
+bb:
+  %tmp = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp3 = bitcast <1 x i64> %tmp to <2 x float>
+  %tmp4 = shufflevector <2 x float> %tmp3, <2 x float> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %tmp5 = shufflevector <2 x float> %tmp4, <2 x float> undef, <2 x i32> <i32 1, i32 3>
+  %tmp6 = bitcast <2 x float> %tmp5 to <1 x i64>
+  %tmp7 = shufflevector <1 x i64> undef, <1 x i64> %tmp6, <2 x i32> <i32 0, i32 1>
+  %tmp8 = bitcast <2 x i64> %tmp7 to <4 x float>
+  %tmp9 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float>
+  %tmp11 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 0, i32 2>
+  %tmp12 = shufflevector <2 x float> %tmp11, <2 x float> undef, <2 x i32> <i32 0, i32 2>
+  %tmp13 = bitcast <2 x float> %tmp12 to <1 x i64>
+  %tmp14 = shufflevector <1 x i64> %tmp13, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp15 = bitcast <2 x i64> %tmp14 to <4 x float>
+  %tmp16 = insertvalue %struct.wombat.5 undef, <4 x float> %tmp8, 1, 0
+  %tmp17 = insertvalue %struct.wombat.5 %tmp16, <4 x float> %tmp15, 2, 0
+  %tmp18 = insertvalue %struct.wombat.5 %tmp17, <4 x float> undef, 3, 0
+  ret %struct.wombat.5 %tmp18
+}
diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll
index 7fbf8f40903..577f8aa7d39 100644
--- a/test/CodeGen/ARM/divmod.ll
+++ b/test/CodeGen/ARM/divmod.ll
@@ -1,10 +1,18 @@
-; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=swift     | FileCheck %s -check-prefix=SWIFT
+
+; rdar://12481395
 
 define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
 entry:
-; CHECK: foo:
-; CHECK: bl ___divmodsi4
-; CHECK-NOT: bl ___divmodsi4
+; A8: foo:
+; A8: bl ___divmodsi4
+; A8-NOT: bl ___divmodsi4
+
+; SWIFT: foo:
+; SWIFT: sdiv
+; SWIFT: mls
+; SWIFT-NOT: bl __divmodsi4
   %div = sdiv i32 %x, %y
   store i32 %div, i32* %P, align 4
   %rem = srem i32 %x, %y
@@ -15,9 +23,14 @@ entry:
 
 define void @bar(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
 entry:
-; CHECK: bar:
-; CHECK: bl ___udivmodsi4
-; CHECK-NOT: bl ___udivmodsi4
+; A8: bar:
+; A8: bl ___udivmodsi4
+; A8-NOT: bl ___udivmodsi4
+
+; SWIFT: bar:
+; SWIFT: udiv
+; SWIFT: mls
+; SWIFT-NOT: bl __udivmodsi4
   %div = udiv i32 %x, %y
   store i32 %div, i32* %P, align 4
   %rem = urem i32 %x, %y
@@ -32,14 +45,18 @@ entry:
 
 define void @do_indent(i32 %cols) nounwind {
 entry:
-; CHECK: do_indent:
+; A8: do_indent:
+; SWIFT: do_indent:
   %0 = load i32* @flags, align 4
   %1 = and i32 %0, 67108864
   %2 = icmp eq i32 %1, 0
   br i1 %2, label %bb1, label %bb
 
 bb:
-; CHECK: bl ___divmodsi4
+; A8: bl ___divmodsi4
+; SWIFT: sdiv
+; SWIFT: mls
+; SWIFT-NOT: bl __divmodsi4
   %3 = load i32* @tabsize, align 4
   %4 = srem i32 %cols, %3
   %5 = sdiv i32 %cols, %3
@@ -60,9 +77,14 @@ declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
 ; rdar://11714607
 define i32 @howmany(i32 %x, i32 %y) nounwind {
 entry:
-; CHECK: howmany:
-; CHECK: bl ___udivmodsi4
-; CHECK-NOT: ___udivsi3
+; A8: howmany:
+; A8: bl ___udivmodsi4
+; A8-NOT: ___udivsi3
+
+; SWIFT: howmany:
+; SWIFT: udiv
+; SWIFT: mls
+; SWIFT-NOT: bl __udivmodsi4
   %rem = urem i32 %x, %y
   %div = udiv i32 %x, %y
   %not.cmp = icmp ne i32 %rem, 0
diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll
index 99ba475ad7b..e9541c27880 100644
--- a/test/CodeGen/ARM/struct_byval.ll
+++ b/test/CodeGen/ARM/struct_byval.ll
@@ -44,3 +44,47 @@ entry:
 declare i32 @e1(%struct.SmallStruct* nocapture byval %in) nounwind
 declare i32 @e2(%struct.LargeStruct* nocapture byval %in) nounwind
 declare i32 @e3(%struct.LargeStruct* nocapture byval align 16 %in) nounwind
+
+; rdar://12442472
+; We can't do tail call since address of s is passed to the callee and part of
+; s is in caller's local frame.
+define void @f3(%struct.SmallStruct* nocapture byval %s) nounwind optsize {
+; CHECK: f3
+; CHECK: bl _consumestruct
+entry:
+  %0 = bitcast %struct.SmallStruct* %s to i8*
+  tail call void @consumestruct(i8* %0, i32 80) optsize
+  ret void
+}
+
+define void @f4(%struct.SmallStruct* nocapture byval %s) nounwind optsize {
+; CHECK: f4
+; CHECK: bl _consumestruct
+entry:
+  %addr = getelementptr inbounds %struct.SmallStruct* %s, i32 0, i32 0
+  %0 = bitcast i32* %addr to i8*
+  tail call void @consumestruct(i8* %0, i32 80) optsize
+  ret void
+}
+
+; We can do tail call here since s is in the incoming argument area.
+define void @f5(i32 %a, i32 %b, i32 %c, i32 %d, %struct.SmallStruct* nocapture byval %s) nounwind optsize {
+; CHECK: f5
+; CHECK: b _consumestruct
+entry:
+  %0 = bitcast %struct.SmallStruct* %s to i8*
+  tail call void @consumestruct(i8* %0, i32 80) optsize
+  ret void
+}
+
+define void @f6(i32 %a, i32 %b, i32 %c, i32 %d, %struct.SmallStruct* nocapture byval %s) nounwind optsize {
+; CHECK: f6
+; CHECK: b _consumestruct
+entry:
+  %addr = getelementptr inbounds %struct.SmallStruct* %s, i32 0, i32 0
+  %0 = bitcast i32* %addr to i8*
+  tail call void @consumestruct(i8* %0, i32 80) optsize
+  ret void
+}
+
+declare void @consumestruct(i8* nocapture %structp, i32 %structsize) nounwind
diff --git a/test/CodeGen/ARM/vbsl.ll b/test/CodeGen/ARM/vbsl.ll
index 8ca2fd26b6a..750fb0de538 100644
--- a/test/CodeGen/ARM/vbsl.ll
+++ b/test/CodeGen/ARM/vbsl.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+; rdar://12471808
+
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK: v_bsli8:
 ;CHECK: vbsl
@@ -125,6 +127,13 @@ define <2 x i32> @f3(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind readnone
   ret <2 x i32> %vbsl3.i
 }
 
+define <2 x float> @f4(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone optsize ssp {
+; CHECK: f4:
+; CHECK: vbsl
+  %vbsl4.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
+  ret <2 x float> %vbsl4.i
+}
+
 define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone optsize ssp {
 ; CHECK: g1:
 ; CHECK: vbsl
@@ -146,9 +155,48 @@ define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone
   ret <4 x i32> %vbsl3.i
 }
 
+define <4 x float> @g4(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone optsize ssp {
+; CHECK: g4:
+; CHECK: vbsl
+  %vbsl4.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind
+  ret <4 x float> %vbsl4.i
+}
+
+define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp {
+; CHECK: test_vbsl_s64:
+; CHECK: vbsl d
+  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
+  ret <1 x i64> %vbsl3.i
+}
+
+define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp {
+; CHECK: test_vbsl_u64:
+; CHECK: vbsl d
+  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
+  ret <1 x i64> %vbsl3.i
+}
+
+define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
+; CHECK: test_vbslq_s64:
+; CHECK: vbsl q
+  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
+  ret <2 x i64> %vbsl3.i
+}
+
+define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
+; CHECK: test_vbslq_u64:
+; CHECK: vbsl q
+  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
+  ret <2 x i64> %vbsl3.i
+}
+
 declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
 declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
 declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index a8c224b4385..2cf94d63ca1 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -295,3 +295,39 @@ define <4 x i32> @tduplane(<4 x i32> %invec) {
   %4 = insertelement <4 x i32> %3, i32 255, i32 3
   ret <4 x i32> %4
 }
+
+define <2 x float> @check_f32(<4 x float> %v) nounwind {
+;CHECK: check_f32:
+;CHECK: vdup.32 {{.*}}, d{{..}}[1]
+  %x = extractelement <4 x float> %v, i32 3
+  %1 = insertelement  <2 x float> undef, float %x, i32 0
+  %2 = insertelement  <2 x float> %1, float %x, i32 1
+  ret <2 x float> %2
+}
+
+define <2 x i32> @check_i32(<4 x i32> %v) nounwind {
+;CHECK: check_i32:
+;CHECK: vdup.32 {{.*}}, d{{..}}[1]
+  %x = extractelement <4 x i32> %v, i32 3
+  %1 = insertelement  <2 x i32> undef, i32 %x, i32 0
+  %2 = insertelement  <2 x i32> %1, i32 %x, i32 1
+  ret <2 x i32> %2
+}
+
+define <4 x i16> @check_i16(<8 x i16> %v) nounwind {
+;CHECK: check_i16:
+;CHECK: vdup.16 {{.*}}, d{{..}}[3]
+  %x = extractelement <8 x i16> %v, i32 3
+  %1 = insertelement  <4 x i16> undef, i16 %x, i32 0
+  %2 = insertelement  <4 x i16> %1, i16 %x, i32 1
+  ret <4 x i16> %2
+}
+
+define <8 x i8> @check_i8(<16 x i8> %v) nounwind {
+;CHECK: check_i8:
+;CHECK: vdup.8 {{.*}}, d{{..}}[3]
+  %x = extractelement <16 x i8> %v, i32 3
+  %1 = insertelement  <8  x i8> undef, i8 %x, i32 0
+  %2 = insertelement  <8  x i8> %1, i8 %x, i32 1
+  ret <8 x i8> %2
+}
diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll
new file mode 100644
index 00000000000..f5994046de4
--- /dev/null
+++ b/test/CodeGen/ARM/vselect_imax.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; Make sure that ARM backend with NEON handles vselect.
+
+define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: vcgt.s32 [[QR:q[0-9]+]], [[Q1:q[0-9]+]], [[Q2:q[0-9]+]]
+; CHECK: vbsl [[QR]], [[Q1]], [[Q2]]
+    %cmpres = icmp sgt <4 x i32> %a, %b
+    %maxres = select <4 x i1> %cmpres, <4 x i32> %a,  <4 x i32> %b
+    store <4 x i32> %maxres, <4 x i32>* %m
+    ret void
+}
+
diff --git a/test/CodeGen/Mips/div.ll b/test/CodeGen/Mips/div.ll
new file mode 100644
index 00000000000..00e2c192745
--- /dev/null
+++ b/test/CodeGen/Mips/div.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 100, align 4
+@jjjj = global i32 -4, align 4
+@kkkk = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %div = sdiv i32 %0, %1
+; 16:	div	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mflo	${{[0-9]+}}
+  store i32 %div, i32* @kkkk, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/div_rem.ll b/test/CodeGen/Mips/div_rem.ll
new file mode 100644
index 00000000000..950192eee16
--- /dev/null
+++ b/test/CodeGen/Mips/div_rem.ll
@@ -0,0 +1,21 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 103, align 4
+@jjjj = global i32 -4, align 4
+@kkkk = common global i32 0, align 4
+@llll = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %div = sdiv i32 %0, %1
+  store i32 %div, i32* @kkkk, align 4
+  %rem = srem i32 %0, %1
+; 16:	div	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mflo	${{[0-9]+}}
+; 16: 	mfhi	${{[0-9]+}}
+  store i32 %rem, i32* @llll, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/divu.ll b/test/CodeGen/Mips/divu.ll
new file mode 100644
index 00000000000..b96a439390c
--- /dev/null
+++ b/test/CodeGen/Mips/divu.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 100, align 4
+@jjjj = global i32 4, align 4
+@kkkk = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %div = udiv i32 %0, %1
+; 16:	divu	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mflo	${{[0-9]+}}
+  store i32 %div, i32* @kkkk, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/divu_remu.ll b/test/CodeGen/Mips/divu_remu.ll
new file mode 100644
index 00000000000..a6c1563ac19
--- /dev/null
+++ b/test/CodeGen/Mips/divu_remu.ll
@@ -0,0 +1,23 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 103, align 4
+@jjjj = global i32 4, align 4
+@kkkk = common global i32 0, align 4
+@llll = common global i32 0, align 4
+
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %div = udiv i32 %0, %1
+  store i32 %div, i32* @kkkk, align 4
+  %rem = urem i32 %0, %1
+; 16:	divu	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mflo	${{[0-9]+}}
+; 16: 	mfhi	${{[0-9]+}}
+  store i32 %rem, i32* @llll, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/rem.ll b/test/CodeGen/Mips/rem.ll
new file mode 100644
index 00000000000..b18f85dcbec
--- /dev/null
+++ b/test/CodeGen/Mips/rem.ll
@@ -0,0 +1,19 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 103, align 4
+@jjjj = global i32 -4, align 4
+@kkkk = common global i32 0, align 4
+
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %rem = srem i32 %0, %1
+; 16:	div	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mfhi	${{[0-9]+}}
+  store i32 %rem, i32* @kkkk, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/remu.ll b/test/CodeGen/Mips/remu.ll
new file mode 100644
index 00000000000..472503c3840
--- /dev/null
+++ b/test/CodeGen/Mips/remu.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 103, align 4
+@jjjj = global i32 4, align 4
+@kkkk = common global i32 0, align 4
+@.str = private unnamed_addr constant [15 x i8] c"%u = %u %% %u\0A\00", align 1
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %rem = urem i32 %0, %1
+; 16:	divu	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mfhi	${{[0-9]+}}
+  store i32 %rem, i32* @kkkk, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/2012-10-12-bitcast.ll b/test/CodeGen/PowerPC/2012-10-12-bitcast.ll
new file mode 100644
index 00000000000..f841c5fb92e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2012-10-12-bitcast.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mattr=+altivec < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test(<16 x i8> %v) nounwind {
+entry:
+  %0 = bitcast <16 x i8> %v to i128
+  %1 = lshr i128 %0, 96
+  %2 = trunc i128 %1 to i32
+  ret i32 %2
+}
+
+; Verify that bitcast handles big-endian platforms correctly
+; by checking we load the result from the correct offset
+
+; CHECK: addi [[REGISTER:[0-9]+]], 1, -16
+; CHECK: stvx 2, 0, [[REGISTER]]
+; CHECK: lwz 3, -16(1)
+; CHECK: blr
+
diff --git a/test/CodeGen/PowerPC/floatPSA.ll b/test/CodeGen/PowerPC/floatPSA.ll
new file mode 100644
index 00000000000..b5631a16056
--- /dev/null
+++ b/test/CodeGen/PowerPC/floatPSA.ll
@@ -0,0 +1,97 @@
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+
+; This verifies that single-precision floating point values that can't
+; be passed in registers are stored in the rightmost word of the parameter
+; save area slot.  There are 13 architected floating-point registers, so
+; the 14th is passed in storage.  The address of the 14th argument is
+; 48 (fixed size of the linkage area) + 13 * 8 (first 13 args) + 4
+; (offset to second word) = 156.
+
+define float @bar(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j, float %k, float %l, float %m, float %n) nounwind {
+entry:
+  %a.addr = alloca float, align 4
+  %b.addr = alloca float, align 4
+  %c.addr = alloca float, align 4
+  %d.addr = alloca float, align 4
+  %e.addr = alloca float, align 4
+  %f.addr = alloca float, align 4
+  %g.addr = alloca float, align 4
+  %h.addr = alloca float, align 4
+  %i.addr = alloca float, align 4
+  %j.addr = alloca float, align 4
+  %k.addr = alloca float, align 4
+  %l.addr = alloca float, align 4
+  %m.addr = alloca float, align 4
+  %n.addr = alloca float, align 4
+  store float %a, float* %a.addr, align 4
+  store float %b, float* %b.addr, align 4
+  store float %c, float* %c.addr, align 4
+  store float %d, float* %d.addr, align 4
+  store float %e, float* %e.addr, align 4
+  store float %f, float* %f.addr, align 4
+  store float %g, float* %g.addr, align 4
+  store float %h, float* %h.addr, align 4
+  store float %i, float* %i.addr, align 4
+  store float %j, float* %j.addr, align 4
+  store float %k, float* %k.addr, align 4
+  store float %l, float* %l.addr, align 4
+  store float %m, float* %m.addr, align 4
+  store float %n, float* %n.addr, align 4
+  %0 = load float* %n.addr, align 4
+  ret float %0
+}
+
+; CHECK: lfs {{[0-9]+}}, 156(1)
+
+define float @foo() nounwind {
+entry:
+  %a = alloca float, align 4
+  %b = alloca float, align 4
+  %c = alloca float, align 4
+  %d = alloca float, align 4
+  %e = alloca float, align 4
+  %f = alloca float, align 4
+  %g = alloca float, align 4
+  %h = alloca float, align 4
+  %i = alloca float, align 4
+  %j = alloca float, align 4
+  %k = alloca float, align 4
+  %l = alloca float, align 4
+  %m = alloca float, align 4
+  %n = alloca float, align 4
+  store float 1.000000e+00, float* %a, align 4
+  store float 2.000000e+00, float* %b, align 4
+  store float 3.000000e+00, float* %c, align 4
+  store float 4.000000e+00, float* %d, align 4
+  store float 5.000000e+00, float* %e, align 4
+  store float 6.000000e+00, float* %f, align 4
+  store float 7.000000e+00, float* %g, align 4
+  store float 8.000000e+00, float* %h, align 4
+  store float 9.000000e+00, float* %i, align 4
+  store float 1.000000e+01, float* %j, align 4
+  store float 1.100000e+01, float* %k, align 4
+  store float 1.200000e+01, float* %l, align 4
+  store float 1.300000e+01, float* %m, align 4
+  store float 1.400000e+01, float* %n, align 4
+  %0 = load float* %a, align 4
+  %1 = load float* %b, align 4
+  %2 = load float* %c, align 4
+  %3 = load float* %d, align 4
+  %4 = load float* %e, align 4
+  %5 = load float* %f, align 4
+  %6 = load float* %g, align 4
+  %7 = load float* %h, align 4
+  %8 = load float* %i, align 4
+  %9 = load float* %j, align 4
+  %10 = load float* %k, align 4
+  %11 = load float* %l, align 4
+  %12 = load float* %m, align 4
+  %13 = load float* %n, align 4
+  %call = call float @bar(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13)
+  ret float %call
+}
+
+; Note that stw is used instead of stfs because the value is a simple
+; constant that can be created with a load-immediate in a GPR.
+; CHECK: stw {{[0-9]+}}, 156(1)
+
diff --git a/test/CodeGen/PowerPC/novrsave.ll b/test/CodeGen/PowerPC/novrsave.ll
new file mode 100644
index 00000000000..a70576a291e
--- /dev/null
+++ b/test/CodeGen/PowerPC/novrsave.ll
@@ -0,0 +1,15 @@
+; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu   < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+
+; This verifies that the code to update VRSAVE has been removed for SVR4.
+
+define <4 x float> @bar(<4 x float> %v) nounwind {
+entry:
+  %v.addr = alloca <4 x float>, align 16
+  store <4 x float> %v, <4 x float>* %v.addr, align 16
+  %0 = load <4 x float>* %v.addr, align 16
+  ret <4 x float> %0
+}
+
+; CHECK-NOT: mfspr
+; CHECK-NOT: mtspr
diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll
new file mode 100644
index 00000000000..884d3a89d15
--- /dev/null
+++ b/test/CodeGen/PowerPC/structsinmem.ll
@@ -0,0 +1,227 @@
+; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
+
+; FIXME: The code generation for packed structs is very poor because the
+; PowerPC target wrongly rejects all unaligned loads.  This test case will
+; need to be revised when that is fixed.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.s1 = type { i8 }
+%struct.s2 = type { i16 }
+%struct.s4 = type { i32 }
+%struct.t1 = type { i8 }
+%struct.t3 = type <{ i16, i8 }>
+%struct.t5 = type <{ i32, i8 }>
+%struct.t6 = type <{ i32, i16 }>
+%struct.t7 = type <{ i32, i16, i8 }>
+%struct.s3 = type { i16, i8 }
+%struct.s5 = type { i32, i8 }
+%struct.s6 = type { i32, i16 }
+%struct.s7 = type { i32, i16, i8 }
+%struct.t2 = type <{ i16 }>
+%struct.t4 = type <{ i32 }>
+
+@caller1.p1 = private unnamed_addr constant %struct.s1 { i8 1 }, align 1
+@caller1.p2 = private unnamed_addr constant %struct.s2 { i16 2 }, align 2
+@caller1.p3 = private unnamed_addr constant { i16, i8, i8 } { i16 4, i8 8, i8 undef }, align 2
+@caller1.p4 = private unnamed_addr constant %struct.s4 { i32 16 }, align 4
+@caller1.p5 = private unnamed_addr constant { i32, i8, [3 x i8] } { i32 32, i8 64, [3 x i8] undef }, align 4
+@caller1.p6 = private unnamed_addr constant { i32, i16, [2 x i8] } { i32 128, i16 256, [2 x i8] undef }, align 4
+@caller1.p7 = private unnamed_addr constant { i32, i16, i8, i8 } { i32 512, i16 1024, i8 -3, i8 undef }, align 4
+@caller2.p1 = private unnamed_addr constant %struct.t1 { i8 1 }, align 1
+@caller2.p2 = private unnamed_addr constant { i16 } { i16 2 }, align 1
+@caller2.p3 = private unnamed_addr constant %struct.t3 <{ i16 4, i8 8 }>, align 1
+@caller2.p4 = private unnamed_addr constant { i32 } { i32 16 }, align 1
+@caller2.p5 = private unnamed_addr constant %struct.t5 <{ i32 32, i8 64 }>, align 1
+@caller2.p6 = private unnamed_addr constant %struct.t6 <{ i32 128, i16 256 }>, align 1
+@caller2.p7 = private unnamed_addr constant %struct.t7 <{ i32 512, i16 1024, i8 -3 }>, align 1
+
+define i32 @caller1() nounwind {
+entry:
+  %p1 = alloca %struct.s1, align 1
+  %p2 = alloca %struct.s2, align 2
+  %p3 = alloca %struct.s3, align 2
+  %p4 = alloca %struct.s4, align 4
+  %p5 = alloca %struct.s5, align 4
+  %p6 = alloca %struct.s6, align 4
+  %p7 = alloca %struct.s7, align 4
+  %0 = bitcast %struct.s1* %p1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.s1* @caller1.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+  %1 = bitcast %struct.s2* %p2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s2* @caller1.p2 to i8*), i64 2, i32 2, i1 false)
+  %2 = bitcast %struct.s3* %p3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast ({ i16, i8, i8 }* @caller1.p3 to i8*), i64 4, i32 2, i1 false)
+  %3 = bitcast %struct.s4* %p4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast (%struct.s4* @caller1.p4 to i8*), i64 4, i32 4, i1 false)
+  %4 = bitcast %struct.s5* %p5 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast ({ i32, i8, [3 x i8] }* @caller1.p5 to i8*), i64 8, i32 4, i1 false)
+  %5 = bitcast %struct.s6* %p6 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast ({ i32, i16, [2 x i8] }* @caller1.p6 to i8*), i64 8, i32 4, i1 false)
+  %6 = bitcast %struct.s7* %p7 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast ({ i32, i16, i8, i8 }* @caller1.p7 to i8*), i64 8, i32 4, i1 false)
+  %call = call i32 @callee1(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, %struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7)
+  ret i32 %call
+
+; CHECK: stb {{[0-9]+}}, 119(1)
+; CHECK: sth {{[0-9]+}}, 126(1)
+; CHECK: stw {{[0-9]+}}, 132(1)
+; CHECK: stw {{[0-9]+}}, 140(1)
+; CHECK: std {{[0-9]+}}, 144(1)
+; CHECK: std {{[0-9]+}}, 152(1)
+; CHECK: std {{[0-9]+}}, 160(1)
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define internal i32 @callee1(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.s1* byval %v1, %struct.s2* byval %v2, %struct.s3* byval %v3, %struct.s4* byval %v4, %struct.s5* byval %v5, %struct.s6* byval %v6, %struct.s7* byval %v7) nounwind {
+entry:
+  %z1.addr = alloca i32, align 4
+  %z2.addr = alloca i32, align 4
+  %z3.addr = alloca i32, align 4
+  %z4.addr = alloca i32, align 4
+  %z5.addr = alloca i32, align 4
+  %z6.addr = alloca i32, align 4
+  %z7.addr = alloca i32, align 4
+  %z8.addr = alloca i32, align 4
+  store i32 %z1, i32* %z1.addr, align 4
+  store i32 %z2, i32* %z2.addr, align 4
+  store i32 %z3, i32* %z3.addr, align 4
+  store i32 %z4, i32* %z4.addr, align 4
+  store i32 %z5, i32* %z5.addr, align 4
+  store i32 %z6, i32* %z6.addr, align 4
+  store i32 %z7, i32* %z7.addr, align 4
+  store i32 %z8, i32* %z8.addr, align 4
+  %a = getelementptr inbounds %struct.s1* %v1, i32 0, i32 0
+  %0 = load i8* %a, align 1
+  %conv = zext i8 %0 to i32
+  %a1 = getelementptr inbounds %struct.s2* %v2, i32 0, i32 0
+  %1 = load i16* %a1, align 2
+  %conv2 = sext i16 %1 to i32
+  %add = add nsw i32 %conv, %conv2
+  %a3 = getelementptr inbounds %struct.s3* %v3, i32 0, i32 0
+  %2 = load i16* %a3, align 2
+  %conv4 = sext i16 %2 to i32
+  %add5 = add nsw i32 %add, %conv4
+  %a6 = getelementptr inbounds %struct.s4* %v4, i32 0, i32 0
+  %3 = load i32* %a6, align 4
+  %add7 = add nsw i32 %add5, %3
+  %a8 = getelementptr inbounds %struct.s5* %v5, i32 0, i32 0
+  %4 = load i32* %a8, align 4
+  %add9 = add nsw i32 %add7, %4
+  %a10 = getelementptr inbounds %struct.s6* %v6, i32 0, i32 0
+  %5 = load i32* %a10, align 4
+  %add11 = add nsw i32 %add9, %5
+  %a12 = getelementptr inbounds %struct.s7* %v7, i32 0, i32 0
+  %6 = load i32* %a12, align 4
+  %add13 = add nsw i32 %add11, %6
+  ret i32 %add13
+
+; CHECK: lha {{[0-9]+}}, 126(1)
+; CHECK: lbz {{[0-9]+}}, 119(1)
+; CHECK: lha {{[0-9]+}}, 132(1)
+; CHECK: lwz {{[0-9]+}}, 140(1)
+; CHECK: lwz {{[0-9]+}}, 144(1)
+; CHECK: lwz {{[0-9]+}}, 152(1)
+; CHECK: lwz {{[0-9]+}}, 160(1)
+}
+
+define i32 @caller2() nounwind {
+entry:
+  %p1 = alloca %struct.t1, align 1
+  %p2 = alloca %struct.t2, align 1
+  %p3 = alloca %struct.t3, align 1
+  %p4 = alloca %struct.t4, align 1
+  %p5 = alloca %struct.t5, align 1
+  %p6 = alloca %struct.t6, align 1
+  %p7 = alloca %struct.t7, align 1
+  %0 = bitcast %struct.t1* %p1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.t1* @caller2.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+  %1 = bitcast %struct.t2* %p2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ({ i16 }* @caller2.p2 to i8*), i64 2, i32 1, i1 false)
+  %2 = bitcast %struct.t3* %p3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast (%struct.t3* @caller2.p3 to i8*), i64 3, i32 1, i1 false)
+  %3 = bitcast %struct.t4* %p4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast ({ i32 }* @caller2.p4 to i8*), i64 4, i32 1, i1 false)
+  %4 = bitcast %struct.t5* %p5 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast (%struct.t5* @caller2.p5 to i8*), i64 5, i32 1, i1 false)
+  %5 = bitcast %struct.t6* %p6 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast (%struct.t6* @caller2.p6 to i8*), i64 6, i32 1, i1 false)
+  %6 = bitcast %struct.t7* %p7 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast (%struct.t7* @caller2.p7 to i8*), i64 7, i32 1, i1 false)
+  %call = call i32 @callee2(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, %struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7)
+  ret i32 %call
+
+; CHECK: stb {{[0-9]+}}, 119(1)
+; CHECK: sth {{[0-9]+}}, 126(1)
+; CHECK: stb {{[0-9]+}}, 135(1)
+; CHECK: sth {{[0-9]+}}, 133(1)
+; CHECK: stw {{[0-9]+}}, 140(1)
+; CHECK: stb {{[0-9]+}}, 151(1)
+; CHECK: stw {{[0-9]+}}, 147(1)
+; CHECK: sth {{[0-9]+}}, 158(1)
+; CHECK: stw {{[0-9]+}}, 154(1)
+; CHECK: stb {{[0-9]+}}, 167(1)
+; CHECK: sth {{[0-9]+}}, 165(1)
+; CHECK: stw {{[0-9]+}}, 161(1)
+}
+
+define internal i32 @callee2(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
+entry:
+  %z1.addr = alloca i32, align 4
+  %z2.addr = alloca i32, align 4
+  %z3.addr = alloca i32, align 4
+  %z4.addr = alloca i32, align 4
+  %z5.addr = alloca i32, align 4
+  %z6.addr = alloca i32, align 4
+  %z7.addr = alloca i32, align 4
+  %z8.addr = alloca i32, align 4
+  store i32 %z1, i32* %z1.addr, align 4
+  store i32 %z2, i32* %z2.addr, align 4
+  store i32 %z3, i32* %z3.addr, align 4
+  store i32 %z4, i32* %z4.addr, align 4
+  store i32 %z5, i32* %z5.addr, align 4
+  store i32 %z6, i32* %z6.addr, align 4
+  store i32 %z7, i32* %z7.addr, align 4
+  store i32 %z8, i32* %z8.addr, align 4
+  %a = getelementptr inbounds %struct.t1* %v1, i32 0, i32 0
+  %0 = load i8* %a, align 1
+  %conv = zext i8 %0 to i32
+  %a1 = getelementptr inbounds %struct.t2* %v2, i32 0, i32 0
+  %1 = load i16* %a1, align 1
+  %conv2 = sext i16 %1 to i32
+  %add = add nsw i32 %conv, %conv2
+  %a3 = getelementptr inbounds %struct.t3* %v3, i32 0, i32 0
+  %2 = load i16* %a3, align 1
+  %conv4 = sext i16 %2 to i32
+  %add5 = add nsw i32 %add, %conv4
+  %a6 = getelementptr inbounds %struct.t4* %v4, i32 0, i32 0
+  %3 = load i32* %a6, align 1
+  %add7 = add nsw i32 %add5, %3
+  %a8 = getelementptr inbounds %struct.t5* %v5, i32 0, i32 0
+  %4 = load i32* %a8, align 1
+  %add9 = add nsw i32 %add7, %4
+  %a10 = getelementptr inbounds %struct.t6* %v6, i32 0, i32 0
+  %5 = load i32* %a10, align 1
+  %add11 = add nsw i32 %add9, %5
+  %a12 = getelementptr inbounds %struct.t7* %v7, i32 0, i32 0
+  %6 = load i32* %a12, align 1
+  %add13 = add nsw i32 %add11, %6
+  ret i32 %add13
+
+; CHECK: lbz {{[0-9]+}}, 149(1)
+; CHECK: lbz {{[0-9]+}}, 150(1)
+; CHECK: lbz {{[0-9]+}}, 147(1)
+; CHECK: lbz {{[0-9]+}}, 148(1)
+; CHECK: lbz {{[0-9]+}}, 133(1)
+; CHECK: lbz {{[0-9]+}}, 134(1)
+; CHECK: lha {{[0-9]+}}, 126(1)
+; CHECK: lbz {{[0-9]+}}, 119(1)
+; CHECK: lwz {{[0-9]+}}, 140(1)
+; CHECK: lhz {{[0-9]+}}, 154(1)
+; CHECK: lhz {{[0-9]+}}, 156(1)
+; CHECK: lbz {{[0-9]+}}, 163(1)
+; CHECK: lbz {{[0-9]+}}, 164(1)
+; CHECK: lbz {{[0-9]+}}, 161(1)
+; CHECK: lbz {{[0-9]+}}, 162(1)
+}
diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll
index ffd228bc817..43ba13b426e 100644
--- a/test/CodeGen/PowerPC/structsinregs.ll
+++ b/test/CodeGen/PowerPC/structsinregs.ll
@@ -1,5 +1,9 @@
 ; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
 
+; FIXME: The code generation for packed structs is very poor because the
+; PowerPC target wrongly rejects all unaligned loads.  This test case will
+; need to be revised when that is fixed.
+
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -100,14 +104,14 @@ entry:
 ; CHECK: std 9, 96(1)
 ; CHECK: std 8, 88(1)
 ; CHECK: std 7, 80(1)
-; CHECK: stw 6, 72(1)
-; CHECK: stw 5, 64(1)
-; CHECK: sth 4, 58(1)
-; CHECK: stb 3, 51(1)
-; CHECK: lha {{[0-9]+}}, 58(1)
-; CHECK: lbz {{[0-9]+}}, 51(1)
-; CHECK: lha {{[0-9]+}}, 64(1)
-; CHECK: lwz {{[0-9]+}}, 72(1)
+; CHECK: stw 6, 76(1)
+; CHECK: stw 5, 68(1)
+; CHECK: sth 4, 62(1)
+; CHECK: stb 3, 55(1)
+; CHECK: lha {{[0-9]+}}, 62(1)
+; CHECK: lbz {{[0-9]+}}, 55(1)
+; CHECK: lha {{[0-9]+}}, 68(1)
+; CHECK: lwz {{[0-9]+}}, 76(1)
 ; CHECK: lwz {{[0-9]+}}, 80(1)
 ; CHECK: lwz {{[0-9]+}}, 88(1)
 ; CHECK: lwz {{[0-9]+}}, 96(1)
@@ -188,18 +192,26 @@ entry:
 ; CHECK: sldi 8, 8, 16
 ; CHECK: sldi 7, 7, 24
 ; CHECK: sldi 5, 5, 40
-; CHECK: stw 6, 72(1)
-; CHECK: sth 4, 58(1)
-; CHECK: stb 3, 51(1)
+; CHECK: stw 6, 76(1)
+; CHECK: sth 4, 62(1)
+; CHECK: stb 3, 55(1)
 ; CHECK: std 9, 96(1)
 ; CHECK: std 8, 88(1)
 ; CHECK: std 7, 80(1)
 ; CHECK: std 5, 64(1)
-; CHECK: lha {{[0-9]+}}, 58(1)
-; CHECK: lbz {{[0-9]+}}, 51(1)
-; CHECK: lha {{[0-9]+}}, 64(1)
-; CHECK: lwz {{[0-9]+}}, 72(1)
-; CHECK: lwz {{[0-9]+}}, 80(1)
-; CHECK: lwz {{[0-9]+}}, 88(1)
-; CHECK: lwz {{[0-9]+}}, 96(1)
+; CHECK: lbz {{[0-9]+}}, 85(1)
+; CHECK: lbz {{[0-9]+}}, 86(1)
+; CHECK: lbz {{[0-9]+}}, 83(1)
+; CHECK: lbz {{[0-9]+}}, 84(1)
+; CHECK: lbz {{[0-9]+}}, 69(1)
+; CHECK: lbz {{[0-9]+}}, 70(1)
+; CHECK: lha {{[0-9]+}}, 62(1)
+; CHECK: lbz {{[0-9]+}}, 55(1)
+; CHECK: lwz {{[0-9]+}}, 76(1)
+; CHECK: lhz {{[0-9]+}}, 90(1)
+; CHECK: lhz {{[0-9]+}}, 92(1)
+; CHECK: lbz {{[0-9]+}}, 99(1)
+; CHECK: lbz {{[0-9]+}}, 100(1)
+; CHECK: lbz {{[0-9]+}}, 97(1)
+; CHECK: lbz {{[0-9]+}}, 98(1)
 }
diff --git a/test/CodeGen/PowerPC/vrspill.ll b/test/CodeGen/PowerPC/vrspill.ll
new file mode 100644
index 00000000000..7641017c434
--- /dev/null
+++ b/test/CodeGen/PowerPC/vrspill.ll
@@ -0,0 +1,19 @@
+; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs  < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs < %s | FileCheck %s
+
+; This verifies that we generate correct spill/reload code for vector regs.
+
+define void @addrtaken(i32 %i, <4 x float> %w) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  %w.addr = alloca <4 x float>, align 16
+  store i32 %i, i32* %i.addr, align 4
+  store <4 x float> %w, <4 x float>* %w.addr, align 16
+  call void @foo(i32* %i.addr)
+  ret void
+}
+
+; CHECK: stvx 2, 0, 0
+; CHECK: lvx 2, 0, 0
+
+declare void @foo(i32*)
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index ae5804195c7..3eb7b37ee67 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -477,3 +477,106 @@ for.inc:                                          ; preds = %for.cond
 }
 
 declare void @fn3(...)
+
+; Check coalescing of IMPLICIT_DEF instructions:
+;
+; %vreg1 = IMPLICIT_DEF
+; %vreg2 = MOV32r0
+;
+; When coalescing %vreg1 and %vreg2, the IMPLICIT_DEF instruction should be
+; erased along with its value number.
+;
+define void @rdar12474033() nounwind ssp {
+bb:
+  br i1 undef, label %bb21, label %bb1
+
+bb1:                                              ; preds = %bb
+  switch i32 undef, label %bb10 [
+    i32 4, label %bb2
+    i32 1, label %bb9
+    i32 5, label %bb3
+    i32 6, label %bb3
+    i32 2, label %bb9
+  ]
+
+bb2:                                              ; preds = %bb1
+  unreachable
+
+bb3:                                              ; preds = %bb1, %bb1
+  br i1 undef, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb3
+  unreachable
+
+bb5:                                              ; preds = %bb3
+  %tmp = load <4 x float>* undef, align 1
+  %tmp6 = bitcast <4 x float> %tmp to i128
+  %tmp7 = load <4 x float>* undef, align 1
+  %tmp8 = bitcast <4 x float> %tmp7 to i128
+  br label %bb10
+
+bb9:                                              ; preds = %bb1, %bb1
+  unreachable
+
+bb10:                                             ; preds = %bb5, %bb1
+  %tmp11 = phi i128 [ undef, %bb1 ], [ %tmp6, %bb5 ]
+  %tmp12 = phi i128 [ 0, %bb1 ], [ %tmp8, %bb5 ]
+  switch i32 undef, label %bb21 [
+    i32 2, label %bb18
+    i32 3, label %bb13
+    i32 5, label %bb16
+    i32 6, label %bb17
+    i32 1, label %bb18
+  ]
+
+bb13:                                             ; preds = %bb10
+  br i1 undef, label %bb15, label %bb14
+
+bb14:                                             ; preds = %bb13
+  br label %bb21
+
+bb15:                                             ; preds = %bb13
+  unreachable
+
+bb16:                                             ; preds = %bb10
+  unreachable
+
+bb17:                                             ; preds = %bb10
+  unreachable
+
+bb18:                                             ; preds = %bb10, %bb10
+  %tmp19 = bitcast i128 %tmp11 to <4 x float>
+  %tmp20 = bitcast i128 %tmp12 to <4 x float>
+  br label %bb21
+
+bb21:                                             ; preds = %bb18, %bb14, %bb10, %bb
+  %tmp22 = phi <4 x float> [ undef, %bb ], [ undef, %bb10 ], [ undef, %bb14 ], [ %tmp20, %bb18 ]
+  %tmp23 = phi <4 x float> [ undef, %bb ], [ undef, %bb10 ], [ undef, %bb14 ], [ %tmp19, %bb18 ]
+  store <4 x float> %tmp23, <4 x float>* undef, align 16
+  store <4 x float> %tmp22, <4 x float>* undef, align 16
+  switch i32 undef, label %bb29 [
+    i32 5, label %bb27
+    i32 1, label %bb24
+    i32 2, label %bb25
+    i32 14, label %bb28
+    i32 4, label %bb26
+  ]
+
+bb24:                                             ; preds = %bb21
+  unreachable
+
+bb25:                                             ; preds = %bb21
+  br label %bb29
+
+bb26:                                             ; preds = %bb21
+  br label %bb29
+
+bb27:                                             ; preds = %bb21
+  unreachable
+
+bb28:                                             ; preds = %bb21
+  br label %bb29
+
+bb29:                                             ; preds = %bb28, %bb26, %bb25, %bb21
+  unreachable
+}
diff --git a/test/CodeGen/X86/early-ifcvt-crash.ll b/test/CodeGen/X86/early-ifcvt-crash.ll
new file mode 100644
index 00000000000..c8280269689
--- /dev/null
+++ b/test/CodeGen/X86/early-ifcvt-crash.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -x86-early-ifcvt -verify-machineinstrs
+; RUN: llc < %s -x86-early-ifcvt -stress-early-ifcvt -verify-machineinstrs
+;
+; Run these tests with and without -stress-early-ifcvt to exercise heuristics.
+;
+target triple = "x86_64-apple-macosx10.8.0"
+
+; MachineTraceMetrics::Ensemble::addLiveIns crashes because the first operand
+; on an inline asm instruction is not a vreg def.
+; <rdar://problem/12472811>
+define void @f1() nounwind {
+entry:
+  br i1 undef, label %if.then6.i, label %if.end.i
+
+if.then6.i:
+  br label %if.end.i
+
+if.end.i:
+  br i1 undef, label %if.end25.i, label %if.else17.i
+
+if.else17.i:
+  %shl24.i = shl i32 undef, undef
+  br label %if.end25.i
+
+if.end25.i:
+  %storemerge31.i = phi i32 [ %shl24.i, %if.else17.i ], [ 0, %if.end.i ]
+  store i32 %storemerge31.i, i32* undef, align 4
+  %0 = tail call i32 asm sideeffect "", "=r,r,i,i"(i32 undef, i32 15, i32 1) nounwind
+  %conv = trunc i32 %0 to i8
+  store i8 %conv, i8* undef, align 1
+  unreachable
+}
diff --git a/test/CodeGen/X86/fp-load-trunc.ll b/test/CodeGen/X86/fp-load-trunc.ll
new file mode 100644
index 00000000000..2ae65c97d97
--- /dev/null
+++ b/test/CodeGen/X86/fp-load-trunc.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
+
+define <1 x float> @test1(<1 x double>* %p) nounwind {
+; CHECK: test1
+; CHECK: cvtsd2ss
+; CHECK: ret
+; AVX:   test1
+; AVX:   vcvtsd2ss
+; AVX:   ret
+  %x = load <1 x double>* %p
+  %y = fptrunc <1 x double> %x to <1 x float>
+  ret <1 x float> %y
+}
+
+define <2 x float> @test2(<2 x double>* %p) nounwind {
+; CHECK: test2
+; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
+; CHECK: ret
+; AVX:   test2
+; AVX:   vcvtpd2psx {{[0-9]*}}(%{{.*}})
+; AVX:   ret
+  %x = load <2 x double>* %p
+  %y = fptrunc <2 x double> %x to <2 x float>
+  ret <2 x float> %y
+}
+
+define <4 x float> @test3(<4 x double>* %p) nounwind {
+; CHECK: test3
+; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
+; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
+; CHECK: movlhps
+; CHECK: ret
+; AVX:   test3
+; AVX:   vcvtpd2psy {{[0-9]*}}(%{{.*}})
+; AVX:   ret
+  %x = load <4 x double>* %p
+  %y = fptrunc <4 x double> %x to <4 x float>
+  ret <4 x float> %y
+}
+
+define <8 x float> @test4(<8 x double>* %p) nounwind {
+; CHECK: test4
+; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
+; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
+; CHECK: movlhps
+; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
+; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
+; CHECK: movlhps
+; CHECK: ret
+; AVX:   test4
+; AVX:   vcvtpd2psy {{[0-9]*}}(%{{.*}})
+; AVX:   vcvtpd2psy {{[0-9]*}}(%{{.*}})
+; AVX:   vinsertf128
+; AVX:   ret
+  %x = load <8 x double>* %p
+  %y = fptrunc <8 x double> %x to <8 x float>
+  ret <8 x float> %y
+}
+
+
diff --git a/test/CodeGen/X86/fp-trunc.ll b/test/CodeGen/X86/fp-trunc.ll
index 170637a40ee..25442fcadd2 100644
--- a/test/CodeGen/X86/fp-trunc.ll
+++ b/test/CodeGen/X86/fp-trunc.ll
@@ -1,33 +1,56 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
 
 define <1 x float> @test1(<1 x double> %x) nounwind {
+; CHECK: test1
 ; CHECK: cvtsd2ss
 ; CHECK: ret
+; AVX:   test1
+; AVX:   vcvtsd2ss
+; AVX:   ret
   %y = fptrunc <1 x double> %x to <1 x float>
   ret <1 x float> %y
 }
 
-
 define <2 x float> @test2(<2 x double> %x) nounwind {
-; FIXME: It would be nice if this compiled down to a cvtpd2ps
-; CHECK: cvtsd2ss
-; CHECK: cvtsd2ss
+; CHECK: test2
+; CHECK: cvtpd2ps
 ; CHECK: ret
+; AVX:   test2
+; AVX-NOT:  vcvtpd2psy
+; AVX:   vcvtpd2ps
+; AVX:   ret
   %y = fptrunc <2 x double> %x to <2 x float>
   ret <2 x float> %y
 }
 
-define <8 x float> @test3(<8 x double> %x) nounwind {
-; FIXME: It would be nice if this compiled down to a series of cvtpd2ps
-; CHECK: cvtsd2ss
-; CHECK: cvtsd2ss
-; CHECK: cvtsd2ss
-; CHECK: cvtsd2ss
-; CHECK: cvtsd2ss
-; CHECK: cvtsd2ss
-; CHECK: cvtsd2ss
-; CHECK: cvtsd2ss
+define <4 x float> @test3(<4 x double> %x) nounwind {
+; CHECK: test3
+; CHECK: cvtpd2ps
+; CHECK: cvtpd2ps
+; CHECK: movlhps
+; CHECK: ret
+; AVX:   test3
+; AVX:   vcvtpd2psy
+; AVX:   ret
+  %y = fptrunc <4 x double> %x to <4 x float>
+  ret <4 x float> %y
+}
+
+define <8 x float> @test4(<8 x double> %x) nounwind {
+; CHECK: test4
+; CHECK: cvtpd2ps
+; CHECK: cvtpd2ps
+; CHECK: movlhps
+; CHECK: cvtpd2ps
+; CHECK: cvtpd2ps
+; CHECK: movlhps
 ; CHECK: ret
+; AVX:   test4
+; AVX:   vcvtpd2psy
+; AVX:   vcvtpd2psy
+; AVX:   vinsertf128
+; AVX:   ret
   %y = fptrunc <8 x double> %x to <8 x float>
   ret <8 x float> %y
 }
diff --git a/test/CodeGen/X86/handle-move.ll b/test/CodeGen/X86/handle-move.ll
new file mode 100644
index 00000000000..e9f7a962e20
--- /dev/null
+++ b/test/CodeGen/X86/handle-move.ll
@@ -0,0 +1,74 @@
+; RUN: llc -march=x86-64 -mcpu=core2 -fast-isel -enable-misched -misched=shuffle -misched-bottomup -verify-machineinstrs < %s
+; RUN: llc -march=x86-64 -mcpu=core2 -fast-isel -enable-misched -misched=shuffle -misched-topdown -verify-machineinstrs < %s
+; REQUIRES: asserts
+;
+; Test the LiveIntervals::handleMove() function.
+;
+; Moving the DIV32r instruction exercises the regunit update code because
+; %EDX has a live range into the function and is used by the DIV32r.
+;
+; Here sinking a kill + dead def:
+; 144B -> 180B: DIV32r %vreg4, %EAX<imp-def>, %EDX<imp-def,dead>, %EFLAGS<imp-def,dead>, %EAX<imp-use,kill>, %EDX<imp-use>
+;       %vreg4: [48r,144r:0)  0@48r
+;         -->   [48r,180r:0)  0@48r
+;       DH:     [0B,16r:0)[128r,144r:2)[144r,144d:1)  0@0B-phi 1@144r 2@128r
+;         -->   [0B,16r:0)[128r,180r:2)[180r,180d:1)  0@0B-phi 1@180r 2@128r
+;       DL:     [0B,16r:0)[128r,144r:2)[144r,144d:1)  0@0B-phi 1@144r 2@128r
+;         -->   [0B,16r:0)[128r,180r:2)[180r,180d:1)  0@0B-phi 1@180r 2@128r
+;
+define i32 @f1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+entry:
+  %y = add i32 %c, 1
+  %x = udiv i32 %b, %a
+  %add = add nsw i32 %y, %x
+  ret i32 %add
+}
+
+; Same as above, but moving a kill + live def:
+; 144B -> 180B: DIV32r %vreg4, %EAX<imp-def,dead>, %EDX<imp-def>, %EFLAGS<imp-def,dead>, %EAX<imp-use,kill>, %EDX<imp-use>
+;       %vreg4: [48r,144r:0)  0@48r
+;         -->   [48r,180r:0)  0@48r
+;       DH:     [0B,16r:0)[128r,144r:2)[144r,184r:1)  0@0B-phi 1@144r 2@128r
+;         -->   [0B,16r:0)[128r,180r:2)[180r,184r:1)  0@0B-phi 1@180r 2@128r
+;       DL:     [0B,16r:0)[128r,144r:2)[144r,184r:1)  0@0B-phi 1@144r 2@128r
+;         -->   [0B,16r:0)[128r,180r:2)[180r,184r:1)  0@0B-phi 1@180r 2@128r
+;
+define i32 @f2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+entry:
+  %y = sub i32 %c, %d
+  %x = urem i32 %b, %a
+  %add = add nsw i32 %x, %y
+  ret i32 %add
+}
+
+; Moving a use below the existing kill (%vreg5):
+; Moving a tied virtual register def (%vreg11):
+;
+; 96B -> 120B: %vreg11<def,tied1> = SUB32rr %vreg11<tied0>, %vreg5
+;       %vreg11:        [80r,96r:1)[96r,144r:0)  0@96r 1@80r
+;            -->        [80r,120r:1)[120r,144r:0)  0@120r 1@80r
+;       %vreg5:         [16r,112r:0)  0@16r
+;            -->        [16r,120r:0)  0@16r
+;
+define i32 @f3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+entry:
+  %y = sub i32 %a, %b
+  %x = add i32 %a, %b
+  %r = mul i32 %x, %y
+  ret i32 %r
+}
+
+; Move EFLAGS dead def across another def:
+; handleMove 208B -> 36B: %EDX<def> = MOV32r0 %EFLAGS<imp-def,dead>
+;    EFLAGS:    [20r,20d:4)[160r,160d:3)[208r,208d:0)[224r,224d:1)[272r,272d:2)[304r,304d:5)  0@208r 1@224r 2@272r 3@160r 4@20r 5@304r
+;         -->   [20r,20d:4)[36r,36d:0)[160r,160d:3)[224r,224d:1)[272r,272d:2)[304r,304d:5)  0@36r 1@224r 2@272r 3@160r 4@20r 5@304r
+;
+define i32 @f4(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+entry:
+  %x = sub i32 %a, %b
+  %y = sub i32 %b, %c
+  %z = sub i32 %c, %d
+  %r1 = udiv i32 %x, %y
+  %r2 = mul i32 %z, %r1
+  ret i32 %r2
+}
diff --git a/test/CodeGen/X86/misched-ilp.ll b/test/CodeGen/X86/misched-ilp.ll
new file mode 100644
index 00000000000..c6cedb7be87
--- /dev/null
+++ b/test/CodeGen/X86/misched-ilp.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s
+;
+; Basic verification of the ScheduleDAGILP metric.
+;
+; MAX: addss
+; MAX: addss
+; MAX: addss
+; MAX: subss
+; MAX: addss
+;
+; MIN: addss
+; MIN: addss
+; MIN: subss
+; MIN: addss
+; MIN: addss
+define float @ilpsched(float %a, float %b, float %c, float %d, float %e, float %f) nounwind uwtable readnone ssp {
+entry:
+  %add = fadd float %a, %b
+  %add1 = fadd float %c, %d
+  %add2 = fadd float %e, %f
+  %add3 = fsub float %add1, %add2
+  %add4 = fadd float %add, %add3
+  ret float %add4
+}
diff --git a/test/CodeGen/X86/misched-new.ll b/test/CodeGen/X86/misched-new.ll
index 8f2f6f7697d..cec04b534fb 100644
--- a/test/CodeGen/X86/misched-new.ll
+++ b/test/CodeGen/X86/misched-new.ll
@@ -1,4 +1,6 @@
-; RUN: llc -march=x86-64 -mcpu=core2 -enable-misched -misched=shuffle -misched-bottomup < %s
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
+; RUN:          -misched=shuffle -misched-bottomup -verify-machineinstrs \
+; RUN:     | FileCheck %s
 ; REQUIRES: asserts
 ;
 ; Interesting MachineScheduler cases.
@@ -25,3 +27,27 @@ for.cond.preheader:                               ; preds = %entry
 if.end:                                           ; preds = %entry
   ret void
 }
+
+; The machine verifier checks that EFLAGS kill flags are updated when
+; the scheduler reorders cmovel instructions.
+;
+; CHECK: test
+; CHECK: cmovel
+; CHECK: cmovel
+; CHECK: call
+define void @foo(i32 %b) nounwind uwtable ssp {
+entry:
+  %tobool = icmp ne i32 %b, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %v1 = phi i32 [1, %entry], [2, %if.then]
+  %v2 = phi i32 [3, %entry], [4, %if.then]
+  call void @bar(i32 %v1, i32 %v2)
+  ret void
+}
+
+declare void @bar(i32,i32)
diff --git a/test/CodeGen/X86/pr14088.ll b/test/CodeGen/X86/pr14088.ll
new file mode 100644
index 00000000000..505e3b5cf26
--- /dev/null
+++ b/test/CodeGen/X86/pr14088.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple x86_64-linux -mcpu core2 -verify-machineinstrs %s -o - | FileCheck %s
+define i32 @f(i1 %foo, i16* %tm_year2, i8* %bar, i16 %zed, i32 %zed2) {
+entry:
+  br i1 %foo, label %return, label %if.end
+
+if.end:
+  %rem = srem i32 %zed2, 100
+  %conv3 = trunc i32 %rem to i16
+  store i16 %conv3, i16* %tm_year2
+  %sext = shl i32 %rem, 16
+  %conv5 = ashr exact i32 %sext, 16
+  %div = sdiv i32 %conv5, 10
+  %conv6 = trunc i32 %div to i8
+  store i8 %conv6, i8* %bar
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 0, %if.end ], [ -1, %entry ]
+  ret i32 %retval.0
+}
+
+; We were miscompiling this and using %ax instead of %cx in the movw.
+; CHECK: movswl	%cx, %ecx
+; CHECK: movw	%cx, (%rsi)
+; CHECK: movslq	%ecx, %rcx
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 2e39473057b..3bec3acdbf7 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -344,3 +344,16 @@ entry:
 ; ATOM: negw
 ; ATOM: sbbw
 }
+
+define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind {
+  %cmp = icmp slt i32 %x, 15
+  %sel = select i1 %cmp, i8 %a, i8 %b
+  ret i8 %sel
+; CHECK: test18:
+; CHECK: cmpl $15, %edi
+; CHECK: cmovgel %edx
+
+; ATOM: test18:
+; ATOM: cmpl $15, %edi
+; ATOM: cmovgel %edx
+}
diff --git a/test/CodeGen/X86/select_const.ll b/test/CodeGen/X86/select_const.ll
new file mode 100644
index 00000000000..5b2409d2396
--- /dev/null
+++ b/test/CodeGen/X86/select_const.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=corei7 | FileCheck %s
+
+define i64 @test1(i64 %x) nounwind {
+entry:
+  %cmp = icmp eq i64 %x, 2
+  %add = add i64 %x, 1
+  %retval.0 = select i1 %cmp, i64 2, i64 %add
+  ret i64 %retval.0
+
+; CHECK: test1:
+; CHECK: leaq 1(%rdi), %rax
+; CHECK: cmpq $2, %rdi
+; CHECK: cmoveq %rdi, %rax
+; CHECK: ret
+
+}
diff --git a/test/CodeGen/X86/sjlj.ll b/test/CodeGen/X86/sjlj.ll
new file mode 100644
index 00000000000..d594e982994
--- /dev/null
+++ b/test/CodeGen/X86/sjlj.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 -relocation-model=static | FileCheck --check-prefix=X86 %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck --check-prefix=X64 %s
+
+@buf = internal global [5 x i8*] zeroinitializer
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+declare i8* @llvm.stacksave() nounwind
+
+declare i32 @llvm.eh.sjlj.setjmp(i8*) nounwind
+
+declare void @llvm.eh.sjlj.longjmp(i8*) nounwind
+
+define i32 @sj0() nounwind {
+  %fp = tail call i8* @llvm.frameaddress(i32 0)
+  store i8* %fp, i8** getelementptr inbounds ([5 x i8*]* @buf, i64 0, i64 0), align 16
+  %sp = tail call i8* @llvm.stacksave()
+  store i8* %sp, i8** getelementptr inbounds ([5 x i8*]* @buf, i64 0, i64 2), align 16
+  %r = tail call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([5 x i8*]* @buf to i8*))
+  ret i32 %r
+; X86: sj0
+; x86: movl %ebp, buf
+; x86: movl ${{.*LBB.*}}, buf+4
+; X86: movl %esp, buf+8
+; X86: ret
+; X64: sj0
+; x64: movq %rbp, buf(%rip)
+; x64: movq ${{.*LBB.*}}, buf+8(%rip)
+; X64: movq %rsp, buf+16(%rip)
+; X64: ret
+}
+
+define void @lj0() nounwind {
+  tail call void @llvm.eh.sjlj.longjmp(i8* bitcast ([5 x i8*]* @buf to i8*))
+  unreachable
+; X86: lj0
+; X86: movl buf, %ebp
+; X86: movl buf+4, %[[REG32:.*]]
+; X86: movl buf+8, %esp
+; X86: jmpl *%[[REG32]]
+; X64: lj0
+; X64: movq buf(%rip), %rbp
+; X64: movq buf+8(%rip), %[[REG64:.*]]
+; X64: movq buf+16(%rip), %rsp
+; X64: jmpq *%[[REG64]]
+}
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
index ba8d65a4fa4..3d92946087e 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -6,8 +6,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; If a global is present, __asan_[un]register_globals should be called from
 ; module ctor/dtor
 
-; CHECK: llvm.global_dtors
 ; CHECK: llvm.global_ctors
+; CHECK: llvm.global_dtors
 
 ; CHECK: define internal void @asan.module_ctor
 ; CHECK-NOT: ret
diff --git a/test/MC/Mips/mips-coprocessor-encodings.s b/test/MC/Mips/mips-coprocessor-encodings.s
new file mode 100644
index 00000000000..bad9163ba9f
--- /dev/null
+++ b/test/MC/Mips/mips-coprocessor-encodings.s
@@ -0,0 +1,37 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-freebsd -show-encoding | FileCheck --check-prefix=MIPS64 %s
+
+# MIPS64:	dmtc0	$12, $16, 2             # encoding: [0x40,0xac,0x80,0x02]
+# MIPS64:	dmtc0	$12, $16, 0             # encoding: [0x40,0xac,0x80,0x00]
+# MIPS64:	mtc0	$12, $16, 2             # encoding: [0x40,0x8c,0x80,0x02]
+# MIPS64:	mtc0	$12, $16, 0             # encoding: [0x40,0x8c,0x80,0x00]
+# MIPS64:	dmfc0	$12, $16, 2             # encoding: [0x40,0x2c,0x80,0x02]
+# MIPS64:	dmfc0	$12, $16, 0             # encoding: [0x40,0x2c,0x80,0x00]
+# MIPS64:	mfc0	$12, $16, 2             # encoding: [0x40,0x0c,0x80,0x02]
+# MIPS64:	mfc0	$12, $16, 0             # encoding: [0x40,0x0c,0x80,0x00]
+
+	dmtc0	$12, $16, 2
+	dmtc0	$12, $16
+	mtc0	$12, $16, 2
+	mtc0	$12, $16
+	dmfc0	$12, $16, 2
+	dmfc0	$12, $16
+	mfc0	$12, $16, 2
+	mfc0	$12, $16
+
+# MIPS64:	dmtc2	$12, $16, 2             # encoding: [0x48,0xac,0x80,0x02]
+# MIPS64:	dmtc2	$12, $16, 0             # encoding: [0x48,0xac,0x80,0x00]
+# MIPS64:	mtc2	$12, $16, 2             # encoding: [0x48,0x8c,0x80,0x02]
+# MIPS64:	mtc2	$12, $16, 0             # encoding: [0x48,0x8c,0x80,0x00]
+# MIPS64:	dmfc2	$12, $16, 2             # encoding: [0x48,0x2c,0x80,0x02]
+# MIPS64:	dmfc2	$12, $16, 0             # encoding: [0x48,0x2c,0x80,0x00]
+# MIPS64:	mfc2	$12, $16, 2             # encoding: [0x48,0x0c,0x80,0x02]
+# MIPS64:	mfc2	$12, $16, 0             # encoding: [0x48,0x0c,0x80,0x00]
+
+	dmtc2	$12, $16, 2
+	dmtc2	$12, $16
+	mtc2	$12, $16, 2
+	mtc2	$12, $16
+	dmfc2	$12, $16, 2
+	dmfc2	$12, $16
+	mfc2	$12, $16, 2
+	mfc2	$12, $16
diff --git a/test/MC/Mips/mips-register-names.s b/test/MC/Mips/mips-register-names.s
new file mode 100644
index 00000000000..26187ce5887
--- /dev/null
+++ b/test/MC/Mips/mips-register-names.s
@@ -0,0 +1,71 @@
+# RUN: llvm-mc %s -triple=mips-unknown-freebsd -show-encoding | FileCheck %s
+
+# Check that the register names are mapped to their correct numbers for o32
+# Second byte of addiu with $zero at rt contains the number of the source
+# register.
+
+# CHECK: encoding: [0x24,0x00,0x00,0x00]
+# CHECK: encoding: [0x24,0x01,0x00,0x00]
+# CHECK: encoding: [0x24,0x02,0x00,0x00]
+# CHECK: encoding: [0x24,0x03,0x00,0x00]
+# CHECK: encoding: [0x24,0x04,0x00,0x00]
+# CHECK: encoding: [0x24,0x05,0x00,0x00]
+# CHECK: encoding: [0x24,0x06,0x00,0x00]
+# CHECK: encoding: [0x24,0x07,0x00,0x00]
+# CHECK: encoding: [0x24,0x08,0x00,0x00]
+# CHECK: encoding: [0x24,0x09,0x00,0x00]
+# CHECK: encoding: [0x24,0x0a,0x00,0x00]
+# CHECK: encoding: [0x24,0x0b,0x00,0x00]
+# CHECK: encoding: [0x24,0x0c,0x00,0x00]
+# CHECK: encoding: [0x24,0x0d,0x00,0x00]
+# CHECK: encoding: [0x24,0x0e,0x00,0x00]
+# CHECK: encoding: [0x24,0x0f,0x00,0x00]
+# CHECK: encoding: [0x24,0x10,0x00,0x00]
+# CHECK: encoding: [0x24,0x11,0x00,0x00]
+# CHECK: encoding: [0x24,0x12,0x00,0x00]
+# CHECK: encoding: [0x24,0x13,0x00,0x00]
+# CHECK: encoding: [0x24,0x14,0x00,0x00]
+# CHECK: encoding: [0x24,0x15,0x00,0x00]
+# CHECK: encoding: [0x24,0x16,0x00,0x00]
+# CHECK: encoding: [0x24,0x17,0x00,0x00]
+# CHECK: encoding: [0x24,0x18,0x00,0x00]
+# CHECK: encoding: [0x24,0x19,0x00,0x00]
+# CHECK: encoding: [0x24,0x1a,0x00,0x00]
+# CHECK: encoding: [0x24,0x1b,0x00,0x00]
+# CHECK: encoding: [0x24,0x1c,0x00,0x00]
+# CHECK: encoding: [0x24,0x1d,0x00,0x00]
+# CHECK: encoding: [0x24,0x1e,0x00,0x00]
+# CHECK: encoding: [0x24,0x1f,0x00,0x00]
+addiu	$zero, $zero, 0
+addiu	$at, $zero, 0
+addiu	$v0, $zero, 0
+addiu	$v1, $zero, 0
+addiu	$a0, $zero, 0
+addiu	$a1, $zero, 0
+addiu	$a2, $zero, 0
+addiu	$a3, $zero, 0
+addiu	$t0, $zero, 0
+addiu	$t1, $zero, 0
+addiu	$t2, $zero, 0
+addiu	$t3, $zero, 0
+addiu	$t4, $zero, 0
+addiu	$t5, $zero, 0
+addiu	$t6, $zero, 0
+addiu	$t7, $zero, 0
+addiu	$s0, $zero, 0
+addiu	$s1, $zero, 0
+addiu	$s2, $zero, 0
+addiu	$s3, $zero, 0
+addiu	$s4, $zero, 0
+addiu	$s5, $zero, 0
+addiu	$s6, $zero, 0
+addiu	$s7, $zero, 0
+addiu	$t8, $zero, 0
+addiu	$t9, $zero, 0
+addiu	$k0, $zero, 0
+addiu	$k1, $zero, 0
+addiu	$gp, $zero, 0
+addiu	$sp, $zero, 0
+addiu	$fp, $zero, 0
+addiu	$sp, $zero, 0
+addiu	$ra, $zero, 0
diff --git a/test/MC/Mips/mips64-register-names.s b/test/MC/Mips/mips64-register-names.s
new file mode 100644
index 00000000000..16783ee1a68
--- /dev/null
+++ b/test/MC/Mips/mips64-register-names.s
@@ -0,0 +1,70 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-freebsd -show-encoding | FileCheck %s
+
+# Check that the register names are mapped to their correct numbers for n64
+# Second byte of addiu with $zero at rt contains the number of the source
+# register.
+
+# CHECK: encoding: [0x64,0x00,0x00,0x00]
+# CHECK: encoding: [0x64,0x01,0x00,0x00]
+# CHECK: encoding: [0x64,0x02,0x00,0x00]
+# CHECK: encoding: [0x64,0x03,0x00,0x00]
+# CHECK: encoding: [0x64,0x04,0x00,0x00]
+# CHECK: encoding: [0x64,0x05,0x00,0x00]
+# CHECK: encoding: [0x64,0x06,0x00,0x00]
+# CHECK: encoding: [0x64,0x07,0x00,0x00]
+# CHECK: encoding: [0x64,0x08,0x00,0x00]
+# CHECK: encoding: [0x64,0x09,0x00,0x00]
+# CHECK: encoding: [0x64,0x0a,0x00,0x00]
+# CHECK: encoding: [0x64,0x0b,0x00,0x00]
+# CHECK: encoding: [0x64,0x0c,0x00,0x00]
+# CHECK: encoding: [0x64,0x0d,0x00,0x00]
+# CHECK: encoding: [0x64,0x0e,0x00,0x00]
+# CHECK: encoding: [0x64,0x0f,0x00,0x00]
+# CHECK: encoding: [0x64,0x10,0x00,0x00]
+# CHECK: encoding: [0x64,0x11,0x00,0x00]
+# CHECK: encoding: [0x64,0x12,0x00,0x00]
+# CHECK: encoding: [0x64,0x13,0x00,0x00]
+# CHECK: encoding: [0x64,0x14,0x00,0x00]
+# CHECK: encoding: [0x64,0x15,0x00,0x00]
+# CHECK: encoding: [0x64,0x16,0x00,0x00]
+# CHECK: encoding: [0x64,0x17,0x00,0x00]
+# CHECK: encoding: [0x64,0x18,0x00,0x00]
+# CHECK: encoding: [0x64,0x19,0x00,0x00]
+# CHECK: encoding: [0x64,0x1a,0x00,0x00]
+# CHECK: encoding: [0x64,0x1b,0x00,0x00]
+# CHECK: encoding: [0x64,0x1c,0x00,0x00]
+# CHECK: encoding: [0x64,0x1d,0x00,0x00]
+# CHECK: encoding: [0x64,0x1e,0x00,0x00]
+# CHECK: encoding: [0x64,0x1f,0x00,0x00]
+daddiu	$zero, $zero, 0
+daddiu	$at, $zero, 0
+daddiu	$v0, $zero, 0
+daddiu	$v1, $zero, 0
+daddiu	$a0, $zero, 0
+daddiu	$a1, $zero, 0
+daddiu	$a2, $zero, 0
+daddiu	$a3, $zero, 0
+daddiu	$a4, $zero, 0
+daddiu	$a5, $zero, 0
+daddiu	$a6, $zero, 0
+daddiu	$a7, $zero, 0
+daddiu	$t4, $zero, 0
+daddiu	$t5, $zero, 0
+daddiu	$t6, $zero, 0
+daddiu	$t7, $zero, 0
+daddiu	$s0, $zero, 0
+daddiu	$s1, $zero, 0
+daddiu	$s2, $zero, 0
+daddiu	$s3, $zero, 0
+daddiu	$s4, $zero, 0
+daddiu	$s5, $zero, 0
+daddiu	$s6, $zero, 0
+daddiu	$s7, $zero, 0
+daddiu	$t8, $zero, 0
+daddiu	$t9, $zero, 0
+daddiu	$kt0, $zero, 0
+daddiu	$kt1, $zero, 0
+daddiu	$gp, $zero, 0
+daddiu	$sp, $zero, 0
+daddiu	$s8, $zero, 0
+daddiu	$ra, $zero, 0
diff --git a/test/MC/X86/x86_nop.s b/test/MC/X86/x86_nop.s
index de0fc088344..396e3022ebe 100644
--- a/test/MC/X86/x86_nop.s
+++ b/test/MC/X86/x86_nop.s
@@ -1,7 +1,13 @@
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=geode %s -o %t
-# RUN: llvm-objdump -disassemble %t | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=generic %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i386 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i486 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i586 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=pentium %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=pentium-mmx %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=geode %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i686 %s | llvm-objdump -d - | not FileCheck %s
 
-# CHECK-NOT: nopw
+# CHECK-NOT: nop{{[lw]}}
 inc %eax
 .align 8
 inc %eax
diff --git a/test/Transforms/InstCombine/strcat-1.ll b/test/Transforms/InstCombine/strcat-1.ll
new file mode 100644
index 00000000000..3c05d6b06fa
--- /dev/null
+++ b/test/Transforms/InstCombine/strcat-1.ll
@@ -0,0 +1,38 @@
+; Test that the strcat libcall simplifier works correctly per the
+; bug found in PR3661.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+
+declare i8* @strcat(i8*, i8*)
+declare i32 @puts(i8*)
+
+define i32 @main() {
+; CHECK: @main
+; CHECK-NOT: call i8* @strcat
+; CHECK: call i32 @puts
+
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  store i8 0, i8* %arg1
+
+  ; rslt1 = strcat(target, "hello\00")
+  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %rslt1 = call i8* @strcat(i8* %arg1, i8* %arg2)
+
+  ; rslt2 = strcat(rslt1, "\00")
+  %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %rslt2 = call i8* @strcat(i8* %rslt1, i8* %arg3)
+
+  ; rslt3 = strcat(rslt2, "\00hello\00")
+  %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+  %rslt3 = call i8* @strcat(i8* %rslt2, i8* %arg4)
+
+  call i32 @puts( i8* %rslt3 )
+  ret i32 0
+}
diff --git a/test/Transforms/InstCombine/strcat-2.ll b/test/Transforms/InstCombine/strcat-2.ll
new file mode 100644
index 00000000000..379ee749531
--- /dev/null
+++ b/test/Transforms/InstCombine/strcat-2.ll
@@ -0,0 +1,32 @@
+; Test that the strcat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strcat(i8*, i8*)
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+; CHECK-NOT: call i8* @strcat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strcat(i8* %dst, i8* %src)
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+; CHECK-NEXT: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i8* @strcat(i8* %dst, i8* %src)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strcat-3.ll b/test/Transforms/InstCombine/strcat-3.ll
new file mode 100644
index 00000000000..15aff2f1aa2
--- /dev/null
+++ b/test/Transforms/InstCombine/strcat-3.ll
@@ -0,0 +1,22 @@
+; Test that the strcat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strcat(i8*, i8*)
+
+define void @test_nosimplify1() {
+; CHECK: @test_nosimplify1
+; CHECK: call i16* @strcat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i16* @strcat(i8* %dst, i8* %src)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strchr-1.ll b/test/Transforms/InstCombine/strchr-1.ll
new file mode 100644
index 00000000000..5efab9ec4be
--- /dev/null
+++ b/test/Transforms/InstCombine/strchr-1.ll
@@ -0,0 +1,54 @@
+; Test that the strchr library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+@chp = global i8* zeroinitializer
+
+declare i8* @strchr(i8*, i32)
+
+define void @test_simplify1() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 6)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK: store i8* null, i8** @chp, align 4
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %str = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify3() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 0)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify4(i32 %chr) {
+; CHECK: call i8* @memchr
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 %chr)
+  store i8* %dst, i8** @chp
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strchr-2.ll b/test/Transforms/InstCombine/strchr-2.ll
new file mode 100644
index 00000000000..35bbd23e6d4
--- /dev/null
+++ b/test/Transforms/InstCombine/strchr-2.ll
@@ -0,0 +1,21 @@
+; Test that the strchr libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@chr = global i8 zeroinitializer
+
+declare i8 @strchr(i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK: test_nosimplify1
+; CHECK: call i8 @strchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8 @strchr(i8* %str, i32 119)
+  store i8 %dst, i8* @chr
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strcmp-1.ll b/test/Transforms/InstCombine/strcmp-1.ll
new file mode 100644
index 00000000000..0679246e091
--- /dev/null
+++ b/test/Transforms/InstCombine/strcmp-1.ll
@@ -0,0 +1,82 @@
+; Test that the strcmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+@bell = constant [5 x i8] c"bell\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i32 @strcmp(i8*, i8*)
+
+; strcmp("", x) -> -*x
+define i32 @test1(i8* %str2) {
+; CHECK: @test1
+; CHECK: %strcmpload = load i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: %2 = sub i32 0, %1
+; CHECK: ret i32 %2
+
+  %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+
+}
+
+; strcmp(x, "") -> *x
+define i32 @test2(i8* %str1) {
+; CHECK: @test2
+; CHECK: %strcmpload = load i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: ret i32 %1
+
+  %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+; strcmp(x, y)  -> cnst
+define i32 @test3() {
+; CHECK: @test3
+; CHECK: ret i32 -1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+define i32 @test4() {
+; CHECK: @test4
+; CHECK: ret i32 1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+; strcmp(x, y)   -> memcmp(x, y, <known length>)
+; (This transform is rather difficult to trigger in a useful manner)
+define i32 @test5(i1 %b) {
+; CHECK: @test5
+; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
+; CHECK: ret i32 %memcmp
+
+  %str1 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %temp2 = getelementptr inbounds [5 x i8]* @bell, i32 0, i32 0
+  %str2 = select i1 %b, i8* %temp1, i8* %temp2
+  %temp3 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp3
+}
+
+; strcmp(x,x)  -> 0
+define i32 @test6(i8* %str) {
+; CHECK: @test6
+; CHECK: ret i32 0
+
+  %temp1 = call i32 @strcmp(i8* %str, i8* %str)
+  ret i32 %temp1
+}
diff --git a/test/Transforms/InstCombine/strcmp-2.ll b/test/Transforms/InstCombine/strcmp-2.ll
new file mode 100644
index 00000000000..20518960f30
--- /dev/null
+++ b/test/Transforms/InstCombine/strcmp-2.ll
@@ -0,0 +1,20 @@
+; Test that the strcmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+
+declare i16 @strcmp(i8*, i8*)
+
+define i16 @test_nosimplify() {
+; CHECK: @test_nosimplify
+; CHECK: call i16 @strcmp
+; CHECK: ret i16 %temp1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i16 @strcmp(i8* %str1, i8* %str2)
+  ret i16 %temp1
+}
diff --git a/test/Transforms/InstCombine/strncat-1.ll b/test/Transforms/InstCombine/strncat-1.ll
new file mode 100644
index 00000000000..ad2a18b1465
--- /dev/null
+++ b/test/Transforms/InstCombine/strncat-1.ll
@@ -0,0 +1,37 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+
+declare i8* @strncat(i8*, i8*, i32)
+declare i32 @puts(i8*)
+
+define i32 @main() {
+; CHECK: @main
+; CHECK-NOT: call i8* @strncat
+; CHECK: call i32 @puts
+
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  store i8 0, i8* %arg1
+
+  ; rslt1 = strncat(target, "hello\00")
+  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %rslt1 = call i8* @strncat(i8* %arg1, i8* %arg2, i32 6)
+
+  ; rslt2 = strncat(rslt1, "\00")
+  %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %rslt2 = call i8* @strncat(i8* %rslt1, i8* %arg3, i32 42)
+
+  ; rslt3 = strncat(rslt2, "\00hello\00")
+  %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+  %rslt3 = call i8* @strncat(i8* %rslt2, i8* %arg4, i32 42)
+
+  call i32 @puts(i8* %rslt3)
+  ret i32 0
+}
diff --git a/test/Transforms/InstCombine/strncat-2.ll b/test/Transforms/InstCombine/strncat-2.ll
new file mode 100644
index 00000000000..c56deacd39b
--- /dev/null
+++ b/test/Transforms/InstCombine/strncat-2.ll
@@ -0,0 +1,53 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strncat(i8*, i8*, i32)
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+; CHECK-NOT: call i8* @strncat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 13)
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+; CHECK-NEXT: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 13)
+  ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+; CHECK-NEXT: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 0)
+  ret void
+}
+
+define void @test_nosimplify1() {
+; CHECK: @test_nosimplify1
+; CHECK: call i8* @strncat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 1)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strncat-3.ll b/test/Transforms/InstCombine/strncat-3.ll
new file mode 100644
index 00000000000..3cd79716870
--- /dev/null
+++ b/test/Transforms/InstCombine/strncat-3.ll
@@ -0,0 +1,22 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strncat(i8*, i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK: @test_nosimplify1
+; CHECK: call i16* @strncat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i16* @strncat(i8* %dst, i8* %src, i32 13)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strncmp-1.ll b/test/Transforms/InstCombine/strncmp-1.ll
new file mode 100644
index 00000000000..48b26d1a5f3
--- /dev/null
+++ b/test/Transforms/InstCombine/strncmp-1.ll
@@ -0,0 +1,97 @@
+; Test that the strncmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+@bell = constant [5 x i8] c"bell\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i32 @strncmp(i8*, i8*, i32)
+
+; strncmp("", x, n) -> -*x
+define i32 @test1(i8* %str2) {
+; CHECK: @test1
+; CHECK: %strcmpload = load i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: %2 = sub i32 0, %1
+; CHECK: ret i32 %2
+
+  %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+; strncmp(x, "", n) -> *x
+define i32 @test2(i8* %str1) {
+; CHECK: @test2
+; CHECK: %strcmpload = load i8* %str1
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: ret i32 %1
+
+  %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+; strncmp(x, y, n)  -> cnst
+define i32 @test3() {
+; CHECK: @test3
+; CHECK: ret i32 -1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+define i32 @test4() {
+; CHECK: @test4
+; CHECK: ret i32 1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+define i32 @test5() {
+; CHECK: @test5
+; CHECK: ret i32 0
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 4)
+  ret i32 %temp1
+}
+
+; strncmp(x,y,1) -> memcmp(x,y,1)
+; TODO: Once the memcmp simplifier gets moved into the instcombine pass
+; the following memcmp will be folded into two loads and a subtract.
+define i32 @test6(i8* %str1, i8* %str2) {
+; CHECK: @test6
+; CHECK: call i32 @memcmp
+; CHECK: ret i32 %memcmp
+
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
+  ret i32 %temp1
+}
+
+; strncmp(x,y,0)   -> 0
+define i32 @test7(i8* %str1, i8* %str2) {
+; CHECK: @test7
+; CHECK: ret i32 0
+
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 0)
+  ret i32 %temp1
+}
+
+; strncmp(x,x,n)  -> 0
+define i32 @test8(i8* %str, i32 %n) {
+; CHECK: @test8
+; CHECK: ret i32 0
+
+  %temp1 = call i32 @strncmp(i8* %str, i8* %str, i32 %n)
+  ret i32 %temp1
+}
diff --git a/test/Transforms/InstCombine/strncmp-2.ll b/test/Transforms/InstCombine/strncmp-2.ll
new file mode 100644
index 00000000000..3fc43a6fd4f
--- /dev/null
+++ b/test/Transforms/InstCombine/strncmp-2.ll
@@ -0,0 +1,20 @@
+; Test that the strncmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+
+declare i16 @strncmp(i8*, i8*, i32)
+
+define i16 @test_nosimplify() {
+; CHECK: @test_nosimplify
+; CHECK: call i16 @strncmp
+; CHECK: ret i16 %temp1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i16 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i16 %temp1
+}
diff --git a/test/Transforms/InstCombine/strrchr-1.ll b/test/Transforms/InstCombine/strrchr-1.ll
new file mode 100644
index 00000000000..854ce45bffb
--- /dev/null
+++ b/test/Transforms/InstCombine/strrchr-1.ll
@@ -0,0 +1,54 @@
+; Test that the strrchr library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+@chp = global i8* zeroinitializer
+
+declare i8* @strrchr(i8*, i32)
+
+define void @test_simplify1() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 6)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK: store i8* null, i8** @chp, align 4
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %str = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify3() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %src, i32 0)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_nosimplify1(i32 %chr) {
+; CHECK: @test_nosimplify1
+; CHECK: call i8* @strrchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %src, i32 %chr)
+  store i8* %dst, i8** @chp
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strrchr-2.ll b/test/Transforms/InstCombine/strrchr-2.ll
new file mode 100644
index 00000000000..1974f6ca603
--- /dev/null
+++ b/test/Transforms/InstCombine/strrchr-2.ll
@@ -0,0 +1,21 @@
+; Test that the strrchr libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@chr = global i8 zeroinitializer
+
+declare i8 @strrchr(i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK: test_nosimplify1
+; CHECK: call i8 @strrchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8 @strrchr(i8* %str, i32 119)
+  store i8 %dst, i8* @chr
+  ret void
+}
diff --git a/test/Transforms/InstCombine/struct-assign-tbaa.ll b/test/Transforms/InstCombine/struct-assign-tbaa.ll
index 4fbdb0ab67c..33a771e6d8b 100644
--- a/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -2,25 +2,43 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-%struct.foo = type { float }
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 ; Verify that instcombine preserves TBAA tags when converting a memcpy into
 ; a scalar load and store.
 
+%struct.test1 = type { float }
+
+; CHECK: @test
 ; CHECK: %2 = load float* %0, align 4, !tbaa !0
 ; CHECK: store float %2, float* %1, align 4, !tbaa !0
-; CHECK: !0 = metadata !{metadata !"float", metadata !1}
-define void @test(%struct.foo* nocapture %a, %struct.foo* nocapture %b) {
+; CHECK: ret
+define void @test1(%struct.test1* nocapture %a, %struct.test1* nocapture %b) {
 entry:
-  %0 = bitcast %struct.foo* %a to i8*
-  %1 = bitcast %struct.foo* %b to i8*
+  %0 = bitcast %struct.test1* %a to i8*
+  %1 = bitcast %struct.test1* %b to i8*
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 4, i32 4, i1 false), !tbaa.struct !3
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+%struct.test2 = type { i32 (i8*, i32*, double*)** }
+
+define i32 (i8*, i32*, double*)*** @test2() {
+; CHECK: @test2
+; CHECK-NOT: memcpy
+; CHECK: ret
+  %tmp = alloca %struct.test2, align 8
+  %tmp1 = bitcast %struct.test2* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* undef, i64 8, i32 8, i1 false), !tbaa.struct !4
+  %tmp2 = getelementptr %struct.test2* %tmp, i32 0, i32 0
+  %tmp3 = load i32 (i8*, i32*, double*)*** %tmp2
+  ret i32 (i8*, i32*, double*)*** %tmp2
+}
+
+; CHECK: !0 = metadata !{metadata !"float", metadata !1}
 
 !0 = metadata !{metadata !"Simple C/C++ TBAA"}
 !1 = metadata !{metadata !"omnipotent char", metadata !0}
 !2 = metadata !{metadata !"float", metadata !0}
 !3 = metadata !{i64 0, i64 4, metadata !2}
+!4 = metadata !{i64 0, i64 8, null}
diff --git a/test/Transforms/InstCombine/weak-symbols.ll b/test/Transforms/InstCombine/weak-symbols.ll
new file mode 100644
index 00000000000..0039b5962f7
--- /dev/null
+++ b/test/Transforms/InstCombine/weak-symbols.ll
@@ -0,0 +1,33 @@
+; PR4738 - Test that the library call simplifier doesn't assume anything about
+; weak symbols.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@real_init = weak_odr constant [2 x i8] c"y\00"
+@fake_init = weak constant [2 x i8] c"y\00"
+@.str = private constant [2 x i8] c"y\00"
+
+define i32 @foo() nounwind {
+; CHECK: define i32 @foo
+; CHECK: call i32 @strcmp
+; CHECK: ret i32 %temp1
+
+entry:
+  %str1 = getelementptr inbounds [2 x i8]* @fake_init, i64 0, i64 0
+  %str2 = getelementptr inbounds [2 x i8]* @.str, i64 0, i64 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2) nounwind readonly
+  ret i32 %temp1
+}
+
+define i32 @bar() nounwind {
+; CHECK: define i32 @bar
+; CHECK: ret i32 0
+
+entry:
+  %str1 = getelementptr inbounds [2 x i8]* @real_init, i64 0, i64 0
+  %str2 = getelementptr inbounds [2 x i8]* @.str, i64 0, i64 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2) nounwind readonly
+  ret i32 %temp1
+}
+
+declare i32 @strcmp(i8*, i8*) nounwind readonly
diff --git a/test/Transforms/SROA/alignment.ll b/test/Transforms/SROA/alignment.ll
index 945ad910021..ad5fb6c4a5d 100644
--- a/test/Transforms/SROA/alignment.ll
+++ b/test/Transforms/SROA/alignment.ll
@@ -84,37 +84,6 @@ entry:
   ret void
 }
 
-%struct.S = type { i8, { i64 } }
-
-define void @test4() {
-; This test case triggered very strange alignment behavior with memcpy due to
-; strange splitting. Reported by Duncan.
-; CHECK: @test4
-
-entry:
-  %D.2113 = alloca %struct.S
-  %Op = alloca %struct.S
-  %D.2114 = alloca %struct.S
-  %gep1 = getelementptr inbounds %struct.S* %Op, i32 0, i32 0
-  store i8 0, i8* %gep1, align 8
-  %gep2 = getelementptr inbounds %struct.S* %Op, i32 0, i32 1, i32 0
-  %cast = bitcast i64* %gep2 to double*
-  store double 0.000000e+00, double* %cast, align 8
-  store i64 0, i64* %gep2, align 8
-  %dst1 = bitcast %struct.S* %D.2114 to i8*
-  %src1 = bitcast %struct.S* %Op to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst1, i8* %src1, i32 16, i32 8, i1 false)
-  %dst2 = bitcast %struct.S* %D.2113 to i8*
-  %src2 = bitcast %struct.S* %D.2114 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %src2, i32 16, i32 8, i1 false)
-; We get 3 memcpy calls with various reasons to shrink their alignment to 1.
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 3, i32 1, i1 false)
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 8, i32 1, i1 false)
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 11, i32 1, i1 false)
-
-  ret void
-}
-
 define void @test5() {
 ; Test that we preserve underaligned loads and stores when splitting.
 ; CHECK: @test5
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index e7767ef5e96..644fda167d4 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -409,8 +409,11 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define i16 @test5() {
 ; CHECK: @test5
-; CHECK: alloca float
-; CHECK: ret i16 %
+; CHECK-NOT: alloca float
+; CHECK:      %[[cast:.*]] = bitcast float 0.0{{.*}} to i32
+; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16
+; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16
+; CHECK-NEXT: ret i16 %[[trunc]]
 
 entry:
   %a = alloca [4 x i8]
@@ -968,3 +971,95 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i32 0, i1 false)
   ret void
 }
+
+define i32 @test22(i32 %x) {
+; Test that SROA and promotion is not confused by a grab bax mixture of pointer
+; types involving wrapper aggregates and zero-length aggregate members.
+; CHECK: @test22
+
+entry:
+  %a1 = alloca { { [1 x { i32 }] } }
+  %a2 = alloca { {}, { float }, [0 x i8] }
+  %a3 = alloca { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }
+; CHECK-NOT: alloca
+
+  %wrap1 = insertvalue [1 x { i32 }] undef, i32 %x, 0, 0
+  %gep1 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0, i32 0
+  store [1 x { i32 }] %wrap1, [1 x { i32 }]* %gep1
+
+  %gep2 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0
+  %ptrcast1 = bitcast { [1 x { i32 }] }* %gep2 to { [1 x { float }] }*
+  %load1 = load { [1 x { float }] }* %ptrcast1
+  %unwrap1 = extractvalue { [1 x { float }] } %load1, 0, 0
+
+  %wrap2 = insertvalue { {}, { float }, [0 x i8] } undef, { float } %unwrap1, 1
+  store { {}, { float }, [0 x i8] } %wrap2, { {}, { float }, [0 x i8] }* %a2
+
+  %gep3 = getelementptr { {}, { float }, [0 x i8] }* %a2, i32 0, i32 1, i32 0
+  %ptrcast2 = bitcast float* %gep3 to <4 x i8>*
+  %load3 = load <4 x i8>* %ptrcast2
+  %valcast1 = bitcast <4 x i8> %load3 to i32
+
+  %wrap3 = insertvalue [1 x [1 x i32]] undef, i32 %valcast1, 0, 0
+  %wrap4 = insertvalue { [1 x [1 x i32]], {} } undef, [1 x [1 x i32]] %wrap3, 0
+  %gep4 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1
+  %ptrcast3 = bitcast { [0 x double], [1 x [1 x <4 x i8>]], {} }* %gep4 to { [1 x [1 x i32]], {} }*
+  store { [1 x [1 x i32]], {} } %wrap4, { [1 x [1 x i32]], {} }* %ptrcast3
+
+  %gep5 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1, i32 1, i32 0
+  %ptrcast4 = bitcast [1 x <4 x i8>]* %gep5 to { {}, float, {} }*
+  %load4 = load { {}, float, {} }* %ptrcast4
+  %unwrap2 = extractvalue { {}, float, {} } %load4, 1
+  %valcast2 = bitcast float %unwrap2 to i32
+
+  ret i32 %valcast2
+; CHECK: ret i32
+}
+
+define void @PR14059.1(double* %d) {
+; In PR14059 a peculiar construct was identified as something that is used
+; pervasively in ARM's ABI-calling-convention lowering: the passing of a struct
+; of doubles via an array of i32 in order to place the data into integer
+; registers. This in turn was missed as an optimization by SROA due to the
+; partial loads and stores of integers to the double alloca we were trying to
+; form and promote. The solution is to widen the integer operations to be
+; whole-alloca operations, and perform the appropriate bitcasting on the
+; *values* rather than the pointers. When this works, partial reads and writes
+; via integers can be promoted away.
+; CHECK: @PR14059.1
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+entry:
+  %X.sroa.0.i = alloca double, align 8
+  %0 = bitcast double* %X.sroa.0.i to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %0)
+
+  ; Store to the low 32-bits...
+  %X.sroa.0.0.cast2.i = bitcast double* %X.sroa.0.i to i32*
+  store i32 0, i32* %X.sroa.0.0.cast2.i, align 8
+
+  ; Also use a memset to the middle 32-bits for fun.
+  %X.sroa.0.2.raw_idx2.i = getelementptr inbounds i8* %0, i32 2
+  call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i32 1, i1 false)
+
+  ; Or a memset of the whole thing.
+  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false)
+
+  ; Write to the high 32-bits with a memcpy.
+  %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8* %0, i32 4
+  %d.raw = bitcast double* %d to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i32 1, i1 false)
+
+  ; Store to the high 32-bits...
+  %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
+  store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4
+
+  ; Do the actual math...
+  %X.sroa.0.0.load1.i = load double* %X.sroa.0.i, align 8
+  %accum.real.i = load double* %d, align 8
+  %add.r.i = fadd double %accum.real.i, %X.sroa.0.0.load1.i
+  store double %add.r.i, double* %d, align 8
+  call void @llvm.lifetime.end(i64 -1, i8* %0)
+  ret void
+}
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index 2b0724c7fd4..d95e48f3035 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -256,17 +256,17 @@ entry:
   ret i32 %loaded
 }
 
-define i32 @test10(i32 %b, i32* %ptr) {
+define float @test10(i32 %b, float* %ptr) {
 ; Don't try to promote allocas which are not elligible for it even after
 ; rewriting due to the necessity of inserting bitcasts when speculating a PHI
 ; node.
 ; CHECK: @test10
 ; CHECK: %[[alloca:.*]] = alloca
-; CHECK: %[[argvalue:.*]] = load i32* %ptr
-; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to i32*
-; CHECK: %[[allocavalue:.*]] = load i32* %[[cast]]
-; CHECK: %[[result:.*]] = phi i32 [ %[[allocavalue]], %else ], [ %[[argvalue]], %then ]
-; CHECK-NEXT: ret i32 %[[result]]
+; CHECK: %[[argvalue:.*]] = load float* %ptr
+; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
+; CHECK: %[[allocavalue:.*]] = load float* %[[cast]]
+; CHECK: %[[result:.*]] = phi float [ %[[allocavalue]], %else ], [ %[[argvalue]], %then ]
+; CHECK-NEXT: ret float %[[result]]
 
 entry:
   %f = alloca double
@@ -278,34 +278,34 @@ then:
   br label %exit
 
 else:
-  %bitcast = bitcast double* %f to i32*
+  %bitcast = bitcast double* %f to float*
   br label %exit
 
 exit:
-  %phi = phi i32* [ %bitcast, %else ], [ %ptr, %then ]
-  %loaded = load i32* %phi, align 4
-  ret i32 %loaded
+  %phi = phi float* [ %bitcast, %else ], [ %ptr, %then ]
+  %loaded = load float* %phi, align 4
+  ret float %loaded
 }
 
-define i32 @test11(i32 %b, i32* %ptr) {
+define float @test11(i32 %b, float* %ptr) {
 ; Same as @test10 but for a select rather than a PHI node.
 ; CHECK: @test11
 ; CHECK: %[[alloca:.*]] = alloca
-; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to i32*
-; CHECK: %[[allocavalue:.*]] = load i32* %[[cast]]
-; CHECK: %[[argvalue:.*]] = load i32* %ptr
-; CHECK: %[[result:.*]] = select i1 %{{.*}}, i32 %[[allocavalue]], i32 %[[argvalue]]
-; CHECK-NEXT: ret i32 %[[result]]
+; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
+; CHECK: %[[allocavalue:.*]] = load float* %[[cast]]
+; CHECK: %[[argvalue:.*]] = load float* %ptr
+; CHECK: %[[result:.*]] = select i1 %{{.*}}, float %[[allocavalue]], float %[[argvalue]]
+; CHECK-NEXT: ret float %[[result]]
 
 entry:
   %f = alloca double
   store double 0.0, double* %f
-  store i32 0, i32* %ptr
+  store float 0.0, float* %ptr
   %test = icmp ne i32 %b, 0
-  %bitcast = bitcast double* %f to i32*
-  %select = select i1 %test, i32* %bitcast, i32* %ptr
-  %loaded = load i32* %select, align 4
-  ret i32 %loaded
+  %bitcast = bitcast double* %f to float*
+  %select = select i1 %test, float* %bitcast, float* %ptr
+  %loaded = load float* %select, align 4
+  ret float %loaded
 }
 
 define i32 @test12(i32 %x, i32* %p) {
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index 80757475a5d..92051c62a7c 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -189,3 +189,19 @@ entry:
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
+; CHECK: @test6
+; The old scalarrepl pass would wrongly drop the store to the second alloca.
+; PR13254
+  %tmp = alloca { <4 x i64>, <4 x i64> }
+  %p0 = getelementptr inbounds { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0
+  store <4 x i64> %x, <4 x i64>* %p0
+; CHECK: store <4 x i64> %x,
+  %p1 = getelementptr inbounds { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 1
+  store <4 x i64> %y, <4 x i64>* %p1
+; CHECK: store <4 x i64> %y,
+  %addr = getelementptr inbounds { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0, i64 %n
+  %res = load i64* %addr, align 4
+  ret i64 %res
+}
diff --git a/test/Transforms/SimplifyLibCalls/StrCat.ll b/test/Transforms/SimplifyLibCalls/StrCat.ll
deleted file mode 100644
index 3ea691a3cfb..00000000000
--- a/test/Transforms/SimplifyLibCalls/StrCat.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; Test that the StrCatOptimizer works correctly
-; PR3661
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*strcat"
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   grep "puts.*%arg1"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
-
-declare i8* @strcat(i8*, i8*)
-
-declare i32 @puts(i8*)
-
-define i32 @main() {
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=2]
-	store i8 0, i8* %arg1
-	%arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt1 = call i8* @strcat( i8* %arg1, i8* %arg2 )		; <i8*> [#uses=1]
-	%arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt2 = call i8* @strcat( i8* %rslt1, i8* %arg3 )		; <i8*> [#uses=1]
-	%arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt3 = call i8* @strcat( i8* %rslt2, i8* %arg4 )		; <i8*> [#uses=1]
-	call i32 @puts( i8* %rslt3 )		; <i32>:1 [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/StrChr.ll b/test/Transforms/SimplifyLibCalls/StrChr.ll
deleted file mode 100644
index eaabeb2feb8..00000000000
--- a/test/Transforms/SimplifyLibCalls/StrChr.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; Test that the StrChrOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [14 x i8] c"hello world\5Cn\00"
-@null = constant [1 x i8] zeroinitializer
-
-declare i8* @strchr(i8*, i32)
-
-define i32 @foo(i32 %index) {
-	%hello_p = getelementptr [14 x i8]* @hello, i32 0, i32 0
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
-	%world = call i8* @strchr(i8* %hello_p, i32 119)
-; CHECK: getelementptr i8* %hello_p, i64 6
-	%ignore = call i8* @strchr(i8* %null_p, i32 119)
-; CHECK-NOT: call i8* strchr
-	%null = call i8* @strchr(i8* %hello_p, i32 0)
-; CHECK: getelementptr i8* %hello_p, i64 13
-	%result = call i8* @strchr(i8* %hello_p, i32 %index)
-; CHECK: call i8* @memchr(i8* %hello_p, i32 %index, i64 14)
-	ret i32 %index
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/StrCmp.ll b/test/Transforms/SimplifyLibCalls/StrCmp.ll
deleted file mode 100644
index 60854d76c97..00000000000
--- a/test/Transforms/SimplifyLibCalls/StrCmp.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; Test that the StrCmpOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-
-@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
-@hell = constant [5 x i8] c"hell\00"		; <[5 x i8]*> [#uses=1]
-@bell = constant [5 x i8] c"bell\00"		; <[5 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-
-declare i32 @strcmp(i8*, i8*)
-
-; strcmp("", x) -> -*x
-define i32 @test1(i8* %str) {
-  %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i8* %str)
-  ret i32 %temp1
-  ; CHECK: @test1
-  ; CHECK: %strcmpload = load i8* %str
-  ; CHECK: %1 = zext i8 %strcmpload to i32
-  ; CHECK: %temp1 = sub i32 0, %1
-  ; CHECK: ret i32 %temp1
-}
-
-; strcmp(x, "") -> *x
-define i32 @test2(i8* %str) {
-  %temp1 = call i32 @strcmp(i8* %str, i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0))
-  ret i32 %temp1
-  ; CHECK: @test2
-  ; CHECK: %strcmpload = load i8* %str
-  ; CHECK: %temp1 = zext i8 %strcmpload to i32
-  ; CHECK: ret i32 %temp1
-}
-
-; strcmp(x, y)  -> cnst
-define i32 @test3() {
-  %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0))
-  ret i32 %temp1
-  ; CHECK: @test3
-  ; CHECK: ret i32 -1
-}
-define i32 @test4() {
-  %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0))
-  ret i32 %temp1
-  ; CHECK: @test4
-  ; CHECK: ret i32 1
-}
-
-; strcmp(x, y)   -> memcmp(x, y, <known length>)
-; (This transform is rather difficult to trigger in a useful manner)
-define i32 @test5(i1 %b) {
-  %sel = select i1 %b, i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8]* @bell, i32 0, i32 0)
-  %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i8* %sel)
-  ret i32 %temp1
-  ; CHECK: @test5
-  ; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i8* %sel, i32 5)
-  ; CHECK: ret i32 %memcmp
-}
-
-; strcmp(x,x)  -> 0
-define i32 @test6(i8* %str) {
-  %temp1 = call i32 @strcmp(i8* %str, i8* %str)
-  ret i32 %temp1
-  ; CHECK: @test6
-  ; CHECK: ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrNCat.ll b/test/Transforms/SimplifyLibCalls/StrNCat.ll
deleted file mode 100644
index 073792b96a1..00000000000
--- a/test/Transforms/SimplifyLibCalls/StrNCat.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; Test that the StrNCatOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*strncat"
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   grep "puts.*%arg1"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
-
-declare i8* @strncat(i8*, i8*, i32)
-
-declare i32 @puts(i8*)
-
-define i32 @main() {
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=2]
-	store i8 0, i8* %arg1
-	%arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt1 = call i8* @strncat( i8* %arg1, i8* %arg2, i32 6 )		; <i8*> [#uses=1]
-	%arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt2 = call i8* @strncat( i8* %rslt1, i8* %arg3, i32 42 )		; <i8*> [#uses=1]
-	%arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt3 = call i8* @strncat( i8* %rslt2, i8* %arg4, i32 42 )		; <i8*> [#uses=1]
-	call i32 @puts( i8* %rslt3 )		; <i32>:1 [#uses=0]
-	ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrNCmp.ll b/test/Transforms/SimplifyLibCalls/StrNCmp.ll
deleted file mode 100644
index 0b2a501a3c8..00000000000
--- a/test/Transforms/SimplifyLibCalls/StrNCmp.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; Test that the StrCmpOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-
-@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
-@hell = constant [5 x i8] c"hell\00"		; <[5 x i8]*> [#uses=1]
-@bell = constant [5 x i8] c"bell\00"		; <[5 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-
-declare i32 @strncmp(i8*, i8*, i32)
-
-; strcmp("", x) -> -*x
-define i32 @test1(i8* %str) {
-  %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i8* %str, i32 10)
-  ret i32 %temp1
-  ; CHECK: @test1
-  ; CHECK: %strcmpload = load i8* %str
-  ; CHECK: %1 = zext i8 %strcmpload to i32
-  ; CHECK: %temp1 = sub i32 0, %1
-  ; CHECK: ret i32 %temp1
-}
-
-; strcmp(x, "") -> *x
-define i32 @test2(i8* %str) {
-  %temp1 = call i32 @strncmp(i8* %str, i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i32 10)
-  ret i32 %temp1
-  ; CHECK: @test2
-  ; CHECK: %strcmpload = load i8* %str
-  ; CHECK: %temp1 = zext i8 %strcmpload to i32
-  ; CHECK: ret i32 %temp1
-}
-
-; strncmp(x, y, n)  -> cnst
-define i32 @test3() {
-  %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i32 10)
-  ret i32 %temp1
-  ; CHECK: @test3
-  ; CHECK: ret i32 -1
-}
-define i32 @test4() {
-  %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i32 10)
-  ret i32 %temp1
-  ; CHECK: @test4
-  ; CHECK: ret i32 1
-}
-define i32 @test5() {
-  %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i32 4)
-  ret i32 %temp1
-  ; CHECK: @test5
-  ; CHECK: ret i32 0
-}
-
-; strncmp(x,y,1) -> memcmp(x,y,1)
-define i32 @test6(i8* %str1, i8* %str2) {
-  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
-  ret i32 %temp1
-  ; CHECK: @test6
-  ; CHECK: load i8*
-  ; CHECK: load i8*
-  ; CHECK: sub i32
-}
-
-; strncmp(x,y,0)   -> 0
-define i32 @test7(i8* %str1, i8* %str2) {
-  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 0)
-  ret i32 %temp1
-  ; CHECK: @test7
-  ; CHECK: ret i32 0
-}
-
-; strncmp(x,x,n)  -> 0
-define i32 @test8(i8* %str, i32 %n) {
-  %temp1 = call i32 @strncmp(i8* %str, i8* %str, i32 %n)
-  ret i32 %temp1
-  ; CHECK: @test8
-  ; CHECK: ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrRChr.ll b/test/Transforms/SimplifyLibCalls/StrRChr.ll
deleted file mode 100644
index 2259fc0289f..00000000000
--- a/test/Transforms/SimplifyLibCalls/StrRChr.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; Test that the StrRChrOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "-p:64:64:64"
-
-@hello = constant [14 x i8] c"hello world\5Cn\00"
-@null = constant [1 x i8] zeroinitializer
-
-declare i8* @strrchr(i8*, i32)
-
-define void @foo(i8* %bar) {
-	%hello_p = getelementptr [14 x i8]* @hello, i32 0, i32 0
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
-	%world = call i8* @strrchr(i8* %hello_p, i32 119)
-; CHECK: getelementptr i8* %hello_p, i64 6
-	%ignore = call i8* @strrchr(i8* %null_p, i32 119)
-; CHECK-NOT: call i8* strrchr
-	%null = call i8* @strrchr(i8* %hello_p, i32 0)
-; CHECK: getelementptr i8* %hello_p, i64 13
-	%strchr = call i8* @strrchr(i8* %bar, i32 0)
-; CHECK: call i8* @strchr(i8* %bar, i32 0)
-	ret void
-}
diff --git a/test/Transforms/SimplifyLibCalls/weak-symbols.ll b/test/Transforms/SimplifyLibCalls/weak-symbols.ll
deleted file mode 100644
index 5875b211f77..00000000000
--- a/test/Transforms/SimplifyLibCalls/weak-symbols.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-; PR4738
-
-; SimplifyLibcalls shouldn't assume anything about weak symbols.
-
-@real_init = weak_odr constant [2 x i8] c"y\00"
-@fake_init = weak constant [2 x i8] c"y\00"
-@.str = private constant [2 x i8] c"y\00"
-
-; CHECK: define i32 @foo
-; CHECK: call i32 @strcmp
-define i32 @foo() nounwind {
-entry:
-  %t0 = call i32 @strcmp(i8* getelementptr inbounds ([2 x i8]* @fake_init, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind readonly
-  ret i32 %t0
-}
-
-; CHECK: define i32 @bar
-; CHECK: ret i32 0
-define i32 @bar() nounwind {
-entry:
-  %t0 = call i32 @strcmp(i8* getelementptr inbounds ([2 x i8]* @real_init, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind readonly
-  ret i32 %t0
-}
-
-declare i32 @strcmp(i8*, i8*) nounwind readonly
diff --git a/tools/bugpoint-passes/bugpoint.exports b/tools/bugpoint-passes/bugpoint.exports
index e69de29bb2d..d8fdd6a5767 100644
--- a/tools/bugpoint-passes/bugpoint.exports
+++ b/tools/bugpoint-passes/bugpoint.exports
@@ -0,0 +1 @@
+_ZN4llvm14BasicBlockPass14doFinalizationERNS_6ModuleE
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 04e5bca3855..4d4a74c009e 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Support/IRReader.h"
+#include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
 #include "llvm/MC/SubtargetFeature.h"
@@ -62,216 +63,13 @@ OptLevel("O",
 static cl::opt<std::string>
 TargetTriple("mtriple", cl::desc("Override target triple for module"));
 
-static cl::opt<std::string>
-MArch("march", cl::desc("Architecture to generate code for (see --version)"));
-
-static cl::opt<std::string>
-MCPU("mcpu",
-  cl::desc("Target a specific cpu type (-mcpu=help for details)"),
-  cl::value_desc("cpu-name"),
-  cl::init(""));
-
-static cl::list<std::string>
-MAttrs("mattr",
-  cl::CommaSeparated,
-  cl::desc("Target specific attributes (-mattr=help for details)"),
-  cl::value_desc("a1,+a2,-a3,..."));
-
-static cl::opt<Reloc::Model>
-RelocModel("relocation-model",
-             cl::desc("Choose relocation model"),
-             cl::init(Reloc::Default),
-             cl::values(
-            clEnumValN(Reloc::Default, "default",
-                       "Target default relocation model"),
-            clEnumValN(Reloc::Static, "static",
-                       "Non-relocatable code"),
-            clEnumValN(Reloc::PIC_, "pic",
-                       "Fully relocatable, position independent code"),
-            clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
-                       "Relocatable external references, non-relocatable code"),
-            clEnumValEnd));
-
-static cl::opt<llvm::CodeModel::Model>
-CMModel("code-model",
-        cl::desc("Choose code model"),
-        cl::init(CodeModel::Default),
-        cl::values(clEnumValN(CodeModel::Default, "default",
-                              "Target default code model"),
-                   clEnumValN(CodeModel::Small, "small",
-                              "Small code model"),
-                   clEnumValN(CodeModel::Kernel, "kernel",
-                              "Kernel code model"),
-                   clEnumValN(CodeModel::Medium, "medium",
-                              "Medium code model"),
-                   clEnumValN(CodeModel::Large, "large",
-                              "Large code model"),
-                   clEnumValEnd));
-
-static cl::opt<bool>
-RelaxAll("mc-relax-all",
-  cl::desc("When used with filetype=obj, "
-           "relax all fixups in the emitted object file"));
-
-cl::opt<TargetMachine::CodeGenFileType>
-FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
-  cl::desc("Choose a file type (not all types are supported by all targets):"),
-  cl::values(
-       clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm",
-                  "Emit an assembly ('.s') file"),
-       clEnumValN(TargetMachine::CGFT_ObjectFile, "obj",
-                  "Emit a native object ('.o') file"),
-       clEnumValN(TargetMachine::CGFT_Null, "null",
-                  "Emit nothing, for performance testing"),
-       clEnumValEnd));
-
 cl::opt<bool> NoVerify("disable-verify", cl::Hidden,
                        cl::desc("Do not verify input module"));
 
-cl::opt<bool> DisableDotLoc("disable-dot-loc", cl::Hidden,
-                            cl::desc("Do not use .loc entries"));
-
-cl::opt<bool> DisableCFI("disable-cfi", cl::Hidden,
-                         cl::desc("Do not use .cfi_* directives"));
-
-cl::opt<bool> EnableDwarfDirectory("enable-dwarf-directory", cl::Hidden,
-    cl::desc("Use .file directives with an explicit directory."));
-
-static cl::opt<bool>
-DisableRedZone("disable-red-zone",
-  cl::desc("Do not emit code that uses the red zone."),
-  cl::init(false));
-
-static cl::opt<bool>
-EnableFPMAD("enable-fp-mad",
-  cl::desc("Enable less precise MAD instructions to be generated"),
-  cl::init(false));
-
-static cl::opt<bool>
-DisableFPElim("disable-fp-elim",
-  cl::desc("Disable frame pointer elimination optimization"),
-  cl::init(false));
-
-static cl::opt<bool>
-DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
-  cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
-  cl::init(false));
-
-static cl::opt<bool>
-EnableUnsafeFPMath("enable-unsafe-fp-math",
-  cl::desc("Enable optimizations that may decrease FP precision"),
-  cl::init(false));
-
-static cl::opt<bool>
-EnableNoInfsFPMath("enable-no-infs-fp-math",
-  cl::desc("Enable FP math optimizations that assume no +-Infs"),
-  cl::init(false));
-
-static cl::opt<bool>
-EnableNoNaNsFPMath("enable-no-nans-fp-math",
-  cl::desc("Enable FP math optimizations that assume no NaNs"),
-  cl::init(false));
-
-static cl::opt<bool>
-EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
-  cl::Hidden,
-  cl::desc("Force codegen to assume rounding mode can change dynamically"),
-  cl::init(false));
-
-static cl::opt<bool>
-GenerateSoftFloatCalls("soft-float",
-  cl::desc("Generate software floating point library calls"),
-  cl::init(false));
-
-static cl::opt<llvm::FloatABI::ABIType>
-FloatABIForCalls("float-abi",
-  cl::desc("Choose float ABI type"),
-  cl::init(FloatABI::Default),
-  cl::values(
-    clEnumValN(FloatABI::Default, "default",
-               "Target default float ABI type"),
-    clEnumValN(FloatABI::Soft, "soft",
-               "Soft float ABI (implied by -soft-float)"),
-    clEnumValN(FloatABI::Hard, "hard",
-               "Hard float ABI (uses FP registers)"),
-    clEnumValEnd));
-
-static cl::opt<llvm::FPOpFusion::FPOpFusionMode>
-FuseFPOps("fp-contract",
-  cl::desc("Enable aggresive formation of fused FP ops"),
-  cl::init(FPOpFusion::Standard),
-  cl::values(
-    clEnumValN(FPOpFusion::Fast, "fast",
-               "Fuse FP ops whenever profitable"),
-    clEnumValN(FPOpFusion::Standard, "on",
-               "Only fuse 'blessed' FP ops."),
-    clEnumValN(FPOpFusion::Strict, "off",
-               "Only fuse FP ops when the result won't be effected."),
-    clEnumValEnd));
-
-static cl::opt<bool>
-DontPlaceZerosInBSS("nozero-initialized-in-bss",
-  cl::desc("Don't place zero-initialized symbols into bss section"),
-  cl::init(false));
-
-static cl::opt<bool>
+cl::opt<bool>
 DisableSimplifyLibCalls("disable-simplify-libcalls",
-  cl::desc("Disable simplify-libcalls"),
-  cl::init(false));
-
-static cl::opt<bool>
-EnableGuaranteedTailCallOpt("tailcallopt",
-  cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
-  cl::init(false));
-
-static cl::opt<bool>
-DisableTailCalls("disable-tail-calls",
-  cl::desc("Never emit tail calls"),
-  cl::init(false));
-
-static cl::opt<unsigned>
-OverrideStackAlignment("stack-alignment",
-  cl::desc("Override default stack alignment"),
-  cl::init(0));
-
-static cl::opt<bool>
-EnableRealignStack("realign-stack",
-  cl::desc("Realign stack if needed"),
-  cl::init(true));
-
-static cl::opt<std::string>
-TrapFuncName("trap-func", cl::Hidden,
-  cl::desc("Emit a call to trap function rather than a trap instruction"),
-  cl::init(""));
-
-static cl::opt<bool>
-EnablePIE("enable-pie",
-  cl::desc("Assume the creation of a position independent executable."),
-  cl::init(false));
-
-static cl::opt<bool>
-SegmentedStacks("segmented-stacks",
-  cl::desc("Use segmented stacks if possible."),
-  cl::init(false));
-
-static cl::opt<bool>
-UseInitArray("use-init-array",
-  cl::desc("Use .init_array instead of .ctors."),
-  cl::init(false));
-
-static cl::opt<std::string> StopAfter("stop-after",
-  cl::desc("Stop compilation after a specific pass"),
-  cl::value_desc("pass-name"),
-  cl::init(""));
-static cl::opt<std::string> StartAfter("start-after",
-  cl::desc("Resume compilation after a specific pass"),
-  cl::value_desc("pass-name"),
-  cl::init(""));
-
-static cl::opt<unsigned>
-SSPBufferSize("stack-protector-buffer-size", cl::init(8),
-              cl::desc("Lower bound for a buffer to be considered for "
-                       "stack protection"));
+                        cl::desc("Disable simplify-libcalls"),
+                        cl::init(false));
 
 // GetFileNameRoot - Helper function to get the basename of a filename.
 static inline std::string
@@ -505,6 +303,11 @@ int main(int argc, char **argv) {
     TLI->disableAllFunctions();
   PM.add(TLI);
 
+  if (target.get()) {
+    PM.add(new TargetTransformInfo(target->getScalarTargetTransformInfo(),
+                                   target->getVectorTargetTransformInfo()));
+  }
+
   // Add the target data from the target machine, if it exists, or the module.
   if (const DataLayout *TD = Target.getDataLayout())
     PM.add(new DataLayout(*TD));
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 57a31f21b89..0ee72387b81 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -171,6 +171,23 @@ namespace {
     cl::init(false));
 
   cl::opt<bool>
+  GenerateSoftFloatCalls("soft-float",
+    cl::desc("Generate software floating point library calls"),
+    cl::init(false));
+
+  cl::opt<llvm::FloatABI::ABIType>
+  FloatABIForCalls("float-abi",
+                   cl::desc("Choose float ABI type"),
+                   cl::init(FloatABI::Default),
+                   cl::values(
+                     clEnumValN(FloatABI::Default, "default",
+                                "Target default float ABI type"),
+                     clEnumValN(FloatABI::Soft, "soft",
+                                "Soft float ABI (implied by -soft-float)"),
+                     clEnumValN(FloatABI::Hard, "hard",
+                                "Hard float ABI (uses FP registers)"),
+                     clEnumValEnd));
+  cl::opt<bool>
 // In debug builds, make this default to true.
 #ifdef NDEBUG
 #define EMIT_DEBUG false
@@ -555,15 +572,22 @@ int main(int argc, char **argv, char * const *envp) {
   }
   builder.setOptLevel(OLvl);
 
+  TargetOptions Options;
+  Options.UseSoftFloat = GenerateSoftFloatCalls;
+  if (FloatABIForCalls != FloatABI::Default)
+    Options.FloatABIType = FloatABIForCalls;
+  if (GenerateSoftFloatCalls)
+    FloatABIForCalls = FloatABI::Soft;
+
   // Remote target execution doesn't handle EH or debug registration.
   if (!RemoteMCJIT) {
-    TargetOptions Options;
     Options.JITExceptionHandling = EnableJITExceptionHandling;
     Options.JITEmitDebugInfo = EmitJitDebugInfo;
     Options.JITEmitDebugInfoToDisk = EmitJitDebugInfoToDisk;
-    builder.setTargetOptions(Options);
   }
 
+  builder.setTargetOptions(Options);
+
   EE = builder.create();
   if (!EE) {
     if (!ErrorMsg.empty())
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index f1814ab9aab..b1c4f437ffb 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -218,12 +218,13 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg) {
   if (_target != NULL)
     return false;
 
-  std::string Triple = _linker.getModule()->getTargetTriple();
-  if (Triple.empty())
-    Triple = sys::getDefaultTargetTriple();
+  std::string TripleStr = _linker.getModule()->getTargetTriple();
+  if (TripleStr.empty())
+    TripleStr = sys::getDefaultTargetTriple();
+  llvm::Triple Triple(TripleStr);
 
   // create target machine from info for merged modules
-  const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
+  const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
   if (march == NULL)
     return true;
 
@@ -244,11 +245,18 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg) {
 
   // construct LTOModule, hand over ownership of module and target
   SubtargetFeatures Features;
-  Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
+  Features.getDefaultSubtargetFeatures(Triple);
   std::string FeatureStr = Features.getString();
+  // Set a default CPU for Darwin triples.
+  if (_mCpu.empty() && Triple.isOSDarwin()) {
+    if (Triple.getArch() == llvm::Triple::x86_64)
+      _mCpu = "core2";
+    else if (Triple.getArch() == llvm::Triple::x86)
+      _mCpu = "yonah";
+  }
   TargetOptions Options;
   LTOModule::getTargetOptions(Options);
-  _target = march->createTargetMachine(Triple, _mCpu, FeatureStr, Options,
+  _target = march->createTargetMachine(TripleStr, _mCpu, FeatureStr, Options,
                                        RelocModel, CodeModel::Default,
                                        CodeGenOpt::Aggressive);
   return false;
@@ -363,6 +371,8 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
 
   // Add an appropriate DataLayout instance for this module...
   passes.add(new DataLayout(*_target->getDataLayout()));
+  passes.add(new TargetTransformInfo(_target->getScalarTargetTransformInfo(),
+                                     _target->getVectorTargetTransformInfo()));
 
   // Enabling internalize here would use its AllButMain variant. It
   // keeps only main if it exists and does nothing for libraries. Instead
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index 3c3701ae93e..ffdcbe644c8 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -278,23 +278,31 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
     return NULL;
   }
 
-  std::string Triple = m->getTargetTriple();
-  if (Triple.empty())
-    Triple = sys::getDefaultTargetTriple();
+  std::string TripleStr = m->getTargetTriple();
+  if (TripleStr.empty())
+    TripleStr = sys::getDefaultTargetTriple();
+  llvm::Triple Triple(TripleStr);
 
   // find machine architecture for this module
-  const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
+  const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
   if (!march)
     return NULL;
 
   // construct LTOModule, hand over ownership of module and target
   SubtargetFeatures Features;
-  Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
+  Features.getDefaultSubtargetFeatures(Triple);
   std::string FeatureStr = Features.getString();
+  // Set a default CPU for Darwin triples.
   std::string CPU;
+  if (Triple.isOSDarwin()) {
+    if (Triple.getArch() == llvm::Triple::x86_64)
+      CPU = "core2";
+    else if (Triple.getArch() == llvm::Triple::x86)
+      CPU = "yonah";
+  }
   TargetOptions Options;
   getTargetOptions(Options);
-  TargetMachine *target = march->createTargetMachine(Triple, CPU, FeatureStr,
+  TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
                                                      Options);
   LTOModule *Ret = new LTOModule(m.take(), target);
   if (Ret->parseSymbols(errMsg)) {
diff --git a/tools/opt/CMakeLists.txt b/tools/opt/CMakeLists.txt
index 7daf22aa9e3..32de6d40608 100644
--- a/tools/opt/CMakeLists.txt
+++ b/tools/opt/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS bitreader asmparser bitwriter instrumentation scalaropts ipo vectorize)
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser bitwriter instrumentation scalaropts ipo vectorize)
 
 add_llvm_tool(opt
   AnalysisWrappers.cpp
diff --git a/tools/opt/LLVMBuild.txt b/tools/opt/LLVMBuild.txt
index 4de99f51c88..b174431e042 100644
--- a/tools/opt/LLVMBuild.txt
+++ b/tools/opt/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = opt
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar
+required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar all-targets
diff --git a/tools/opt/Makefile b/tools/opt/Makefile
index 16d116da5db..ee7e1cf796a 100644
--- a/tools/opt/Makefile
+++ b/tools/opt/Makefile
@@ -9,6 +9,6 @@
 
 LEVEL := ../..
 TOOLNAME := opt
-LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo vectorize
+LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo vectorize all-targets
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 706a7d51380..8d8d73179e5 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/CallGraphSCCPass.h"
+#include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Analysis/Verifier.h"
@@ -36,7 +37,9 @@
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/LinkAllPasses.h"
 #include "llvm/LinkAllVMCore.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -478,6 +481,75 @@ static void AddStandardLinkPasses(PassManagerBase &PM) {
                                  /*RunInliner=*/ !DisableInline);
 }
 
+//===----------------------------------------------------------------------===//
+// CodeGen-related helper functions.
+//
+static TargetOptions GetTargetOptions() {
+  TargetOptions Options;
+  Options.LessPreciseFPMADOption = EnableFPMAD;
+  Options.NoFramePointerElim = DisableFPElim;
+  Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
+  Options.AllowFPOpFusion = FuseFPOps;
+  Options.UnsafeFPMath = EnableUnsafeFPMath;
+  Options.NoInfsFPMath = EnableNoInfsFPMath;
+  Options.NoNaNsFPMath = EnableNoNaNsFPMath;
+  Options.HonorSignDependentRoundingFPMathOption =
+  EnableHonorSignDependentRoundingFPMath;
+  Options.UseSoftFloat = GenerateSoftFloatCalls;
+  if (FloatABIForCalls != FloatABI::Default)
+    Options.FloatABIType = FloatABIForCalls;
+  Options.NoZerosInBSS = DontPlaceZerosInBSS;
+  Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
+  Options.DisableTailCalls = DisableTailCalls;
+  Options.StackAlignmentOverride = OverrideStackAlignment;
+  Options.RealignStack = EnableRealignStack;
+  Options.TrapFuncName = TrapFuncName;
+  Options.PositionIndependentExecutable = EnablePIE;
+  Options.EnableSegmentedStacks = SegmentedStacks;
+  Options.UseInitArray = UseInitArray;
+  Options.SSPBufferSize = SSPBufferSize;
+  return Options;
+}
+
+CodeGenOpt::Level GetCodeGenOptLevel() {
+  if (OptLevelO1)
+    return CodeGenOpt::Less;
+  if (OptLevelO2)
+    return CodeGenOpt::Default;
+  if (OptLevelO3)
+    return CodeGenOpt::Aggressive;
+  return CodeGenOpt::None;
+}
+
+// Returns the TargetMachine instance or zero if no triple is provided.
+static TargetMachine* GetTargetMachine(std::string TripleStr) {
+  if (TripleStr.empty())
+    return 0;
+
+  // Get the target specific parser.
+  std::string Error;
+  Triple TheTriple(Triple::normalize(TargetTriple));
+
+  const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple,
+                                                         Error);
+  if (!TheTarget) {
+    return 0;
+  }
+
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+  if (MAttrs.size()) {
+    SubtargetFeatures Features;
+    for (unsigned i = 0; i != MAttrs.size(); ++i)
+      Features.AddFeature(MAttrs[i]);
+    FeaturesStr = Features.getString();
+  }
+
+  return TheTarget->createTargetMachine(TheTriple.getTriple(),
+                                        MCPU, FeaturesStr, GetTargetOptions(),
+                                        RelocModel, CMModel,
+                                        GetCodeGenOptLevel());
+}
 
 //===----------------------------------------------------------------------===//
 // main for opt
@@ -579,6 +651,12 @@ int main(int argc, char **argv) {
   if (TD)
     Passes.add(TD);
 
+  std::auto_ptr<TargetMachine> TM(GetTargetMachine(TargetTriple));
+  if (TM.get()) {
+    Passes.add(new TargetTransformInfo(TM->getScalarTargetTransformInfo(),
+                                       TM->getVectorTargetTransformInfo()));
+  }
+
   OwningPtr<FunctionPassManager> FPasses;
   if (OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz || OptLevelO3) {
     FPasses.reset(new FunctionPassManager(M.get()));
diff --git a/unittests/ADT/BitVectorTest.cpp b/unittests/ADT/BitVectorTest.cpp
index d836036aeae..dc298a83d57 100644
--- a/unittests/ADT/BitVectorTest.cpp
+++ b/unittests/ADT/BitVectorTest.cpp
@@ -281,5 +281,57 @@ TYPED_TEST(BitVectorTest, BinOps) {
   EXPECT_FALSE(A.anyCommon(B));
   EXPECT_FALSE(B.anyCommon(A));
 }
+
+TYPED_TEST(BitVectorTest, RangeOps) {
+  TypeParam A;
+  A.resize(256);
+  A.reset();
+  A.set(1, 255);
+
+  EXPECT_FALSE(A.test(0));
+  EXPECT_TRUE( A.test(1));
+  EXPECT_TRUE( A.test(23));
+  EXPECT_TRUE( A.test(254));
+  EXPECT_FALSE(A.test(255));
+
+  TypeParam B;
+  B.resize(256);
+  B.set();
+  B.reset(1, 255);
+
+  EXPECT_TRUE( B.test(0));
+  EXPECT_FALSE(B.test(1));
+  EXPECT_FALSE(B.test(23));
+  EXPECT_FALSE(B.test(254));
+  EXPECT_TRUE( B.test(255));
+
+  TypeParam C;
+  C.resize(3);
+  C.reset();
+  C.set(0, 1);
+
+  EXPECT_TRUE(C.test(0));
+  EXPECT_FALSE( C.test(1));
+  EXPECT_FALSE( C.test(2));
+
+  TypeParam D;
+  D.resize(3);
+  D.set();
+  D.reset(0, 1);
+
+  EXPECT_FALSE(D.test(0));
+  EXPECT_TRUE( D.test(1));
+  EXPECT_TRUE( D.test(2));
+
+  TypeParam E;
+  E.resize(128);
+  E.reset();
+  E.set(1, 33);
+
+  EXPECT_FALSE(E.test(0));
+  EXPECT_TRUE( E.test(1));
+  EXPECT_TRUE( E.test(32));
+  EXPECT_FALSE(E.test(33));
+}
 }
 #endif
diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt
index cb9a9092b56..94f7fda2a9e 100644
--- a/unittests/ADT/CMakeLists.txt
+++ b/unittests/ADT/CMakeLists.txt
@@ -13,6 +13,7 @@ set(ADTSources
   FoldingSet.cpp
   HashingTest.cpp
   ilistTest.cpp
+  ImmutableMapTest.cpp
   ImmutableSetTest.cpp
   IntEqClassesTest.cpp
   IntervalMapTest.cpp
diff --git a/unittests/ADT/ImmutableMapTest.cpp b/unittests/ADT/ImmutableMapTest.cpp
new file mode 100644
index 00000000000..774581ca4ee
--- /dev/null
+++ b/unittests/ADT/ImmutableMapTest.cpp
@@ -0,0 +1,50 @@
+//===----------- ImmutableMapTest.cpp - ImmutableMap unit tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/ImmutableMap.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(ImmutableMapTest, EmptyIntMapTest) {
+  ImmutableMap<int, int>::Factory f;
+
+  EXPECT_TRUE(f.getEmptyMap() == f.getEmptyMap());
+  EXPECT_FALSE(f.getEmptyMap() != f.getEmptyMap());
+  EXPECT_TRUE(f.getEmptyMap().isEmpty());
+
+  ImmutableMap<int, int> S = f.getEmptyMap();
+  EXPECT_EQ(0u, S.getHeight());
+  EXPECT_TRUE(S.begin() == S.end());
+  EXPECT_FALSE(S.begin() != S.end());
+}
+
+TEST(ImmutableMapTest, MultiElemIntMapTest) {
+  ImmutableMap<int, int>::Factory f;
+  ImmutableMap<int, int> S = f.getEmptyMap();
+
+  ImmutableMap<int, int> S2 = f.add(f.add(f.add(S, 3, 10), 4, 11), 5, 12);
+
+  EXPECT_TRUE(S.isEmpty());
+  EXPECT_FALSE(S2.isEmpty());
+
+  EXPECT_EQ(0, S.lookup(3));
+  EXPECT_EQ(0, S.lookup(9));
+
+  EXPECT_EQ(10, *S2.lookup(3));
+  EXPECT_EQ(11, *S2.lookup(4));
+  EXPECT_EQ(12, *S2.lookup(5));
+
+  EXPECT_EQ(5, S2.getMaxElement()->first);
+  EXPECT_EQ(3U, S2.getHeight());
+}
+
+}
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index 967437ca05a..7c3ab973894 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -105,6 +105,18 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::Linux, T.getOS());
   EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
 
+  T = Triple("powerpc-ibm-aix");
+  EXPECT_EQ(Triple::ppc, T.getArch());
+  EXPECT_EQ(Triple::IBM, T.getVendor());
+  EXPECT_EQ(Triple::AIX, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+  T = Triple("powerpc64-ibm-aix");
+  EXPECT_EQ(Triple::ppc64, T.getArch());
+  EXPECT_EQ(Triple::IBM, T.getVendor());
+  EXPECT_EQ(Triple::AIX, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
   T = Triple("powerpc-dunno-notsure");
   EXPECT_EQ(Triple::ppc, T.getArch());
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 5e2af030f20..ae6855e68bf 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -633,6 +633,7 @@ TEST_F(JITTest, AvailableExternallyGlobalIsntEmitted) {
 // This function is intentionally defined differently in the statically-compiled
 // program from the IR input to the JIT to assert that the JIT doesn't use its
 // definition.
+extern "C" int32_t JITTest_AvailableExternallyFunction() LLVM_ATTRIBUTE_USED;
 extern "C" int32_t JITTest_AvailableExternallyFunction() {
   return 42;
 }
diff --git a/unittests/Support/Casting.cpp b/unittests/Support/Casting.cpp
index dc0205f325f..ad564aa366d 100644
--- a/unittests/Support/Casting.cpp
+++ b/unittests/Support/Casting.cpp
@@ -153,3 +153,54 @@ const bar *B2 = &B;
 }  // anonymous namespace
 
 bar *llvm::fub() { return 0; }
+
+namespace {
+namespace inferred_upcasting {
+// This test case verifies correct behavior of inferred upcasts when the
+// types are statically known to be OK to upcast. This is the case when,
+// for example, Derived inherits from Base, and we do `isa<Base>(Derived)`.
+
+// Note: This test will actually fail to compile without inferred
+// upcasting.
+
+class Base {
+public:
+  // No classof. We are testing that the upcast is inferred.
+  Base() {}
+};
+
+class Derived : public Base {
+public:
+  Derived() {}
+};
+
+// Even with no explicit classof() in Base, we should still be able to cast
+// Derived to its base class.
+TEST(CastingTest, UpcastIsInferred) {
+  Derived D;
+  EXPECT_TRUE(isa<Base>(D));
+  Base *BP = dyn_cast<Base>(&D);
+  EXPECT_TRUE(BP != NULL);
+}
+
+
+// This test verifies that the inferred upcast takes precedence over an
+// explicitly written one. This is important because it verifies that the
+// dynamic check gets optimized away.
+class UseInferredUpcast {
+public:
+  int Dummy;
+  static bool classof(const UseInferredUpcast *) {
+    return false;
+  }
+};
+
+TEST(CastingTest, InferredUpcastTakesPrecedence) {
+  UseInferredUpcast UIU;
+  // Since the explicit classof() returns false, this will fail if the
+  // explicit one is used.
+  EXPECT_TRUE(isa<UseInferredUpcast>(&UIU));
+}
+
+} // end namespace inferred_upcasting
+} // end anonymous namespace
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 435baeef0c1..e76fa570669 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -993,7 +993,7 @@ AsmMatcherInfo::getOperandClass(const CGIOperandList::OperandInfo &OI,
                                 int SubOpIdx) {
   Record *Rec = OI.Rec;
   if (SubOpIdx != -1)
-    Rec = dynamic_cast<DefInit*>(OI.MIOperandInfo->getArg(SubOpIdx))->getDef();
+    Rec = cast<DefInit>(OI.MIOperandInfo->getArg(SubOpIdx))->getDef();
   return getOperandClass(Rec, SubOpIdx);
 }
 
@@ -1007,7 +1007,7 @@ AsmMatcherInfo::getOperandClass(Record *Rec, int SubOpIdx) {
       throw "Record `" + Rec->getName() +
         "' does not have a ParserMatchClass!\n";
 
-    if (DefInit *DI= dynamic_cast<DefInit*>(R->getValue())) {
+    if (DefInit *DI= dyn_cast<DefInit>(R->getValue())) {
       Record *MatchClass = DI->getDef();
       if (ClassInfo *CI = AsmOperandClasses[MatchClass])
         return CI;
@@ -1185,7 +1185,7 @@ void AsmMatcherInfo::buildOperandClasses() {
 
     ListInit *Supers = (*it)->getValueAsListInit("SuperClasses");
     for (unsigned i = 0, e = Supers->getSize(); i != e; ++i) {
-      DefInit *DI = dynamic_cast<DefInit*>(Supers->getElement(i));
+      DefInit *DI = dyn_cast<DefInit>(Supers->getElement(i));
       if (!DI) {
         PrintError((*it)->getLoc(), "Invalid super class reference!");
         continue;
@@ -1203,33 +1203,31 @@ void AsmMatcherInfo::buildOperandClasses() {
 
     // Get or construct the predicate method name.
     Init *PMName = (*it)->getValueInit("PredicateMethod");
-    if (StringInit *SI = dynamic_cast<StringInit*>(PMName)) {
+    if (StringInit *SI = dyn_cast<StringInit>(PMName)) {
       CI->PredicateMethod = SI->getValue();
     } else {
-      assert(dynamic_cast<UnsetInit*>(PMName) &&
-             "Unexpected PredicateMethod field!");
+      assert(isa<UnsetInit>(PMName) && "Unexpected PredicateMethod field!");
       CI->PredicateMethod = "is" + CI->ClassName;
     }
 
     // Get or construct the render method name.
     Init *RMName = (*it)->getValueInit("RenderMethod");
-    if (StringInit *SI = dynamic_cast<StringInit*>(RMName)) {
+    if (StringInit *SI = dyn_cast<StringInit>(RMName)) {
       CI->RenderMethod = SI->getValue();
     } else {
-      assert(dynamic_cast<UnsetInit*>(RMName) &&
-             "Unexpected RenderMethod field!");
+      assert(isa<UnsetInit>(RMName) && "Unexpected RenderMethod field!");
       CI->RenderMethod = "add" + CI->ClassName + "Operands";
     }
 
     // Get the parse method name or leave it as empty.
     Init *PRMName = (*it)->getValueInit("ParserMethod");
-    if (StringInit *SI = dynamic_cast<StringInit*>(PRMName))
+    if (StringInit *SI = dyn_cast<StringInit>(PRMName))
       CI->ParserMethod = SI->getValue();
 
     // Get the diagnostic type or leave it as empty.
     // Get the parse method name or leave it as empty.
     Init *DiagnosticType = (*it)->getValueInit("DiagnosticType");
-    if (StringInit *SI = dynamic_cast<StringInit*>(DiagnosticType))
+    if (StringInit *SI = dyn_cast<StringInit>(DiagnosticType))
       CI->DiagnosticType = SI->getValue();
 
     AsmOperandClasses[*it] = CI;
@@ -1716,9 +1714,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
   OpOS << "void " << Target.getName() << ClassName << "::\n"
        << "convertToMapAndConstraints(unsigned Kind,\n";
   OpOS.indent(27);
-  OpOS << "const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
-  OpOS.indent(27);
-  OpOS << "MatchInstMapAndConstraintsImpl &MapAndConstraints) {\n"
+  OpOS << "const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {\n"
        << "  assert(Kind < CVT_NUM_SIGNATURES && \"Invalid signature!\");\n"
        << "  unsigned NumMCOperands = 0;\n"
        << "  const uint8_t *Converter = ConversionTable[Kind];\n"
@@ -1726,9 +1722,11 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
        << "    switch (*p) {\n"
        << "    default: llvm_unreachable(\"invalid conversion entry!\");\n"
        << "    case CVT_Reg:\n"
+       << "      Operands[*(p + 1)]->setMCOperandNum(NumMCOperands);\n"
+       << "      Operands[*(p + 1)]->setConstraint(\"m\");\n"
+       << "      ++NumMCOperands;\n"
+       << "      break;\n"
        << "    case CVT_Tied:\n"
-       << "      MapAndConstraints.push_back(std::make_pair(NumMCOperands,"
-       << "\"m\"));\n"
        << "      ++NumMCOperands;\n"
        << "      break;\n";
 
@@ -1825,8 +1823,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
 
         // Add a handler for the operand number lookup.
         OpOS << "    case " << Name << ":\n"
-             << "      MapAndConstraints.push_back(std::make_pair(NumMCOperands"
-             << ",\"m\"));\n"
+             << "      Operands[*(p + 1)]->setMCOperandNum(NumMCOperands);\n"
+             << "      Operands[*(p + 1)]->setConstraint(\"m\");\n"
              << "      NumMCOperands += " << OpInfo.MINumOperands << ";\n"
              << "      break;\n";
         break;
@@ -1864,8 +1862,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
               << "      break;\n";
 
         OpOS << "    case " << Name << ":\n"
-             << "      MapAndConstraints.push_back(std::make_pair(NumMCOperands"
-             << ",\"\"));\n"
+             << "      Operands[*(p + 1)]->setMCOperandNum(NumMCOperands);\n"
+             << "      Operands[*(p + 1)]->setConstraint(\"\");\n"
              << "      ++NumMCOperands;\n"
              << "      break;\n";
         break;
@@ -1895,8 +1893,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
               << "      break;\n";
 
         OpOS << "    case " << Name << ":\n"
-             << "      MapAndConstraints.push_back(std::make_pair(NumMCOperands"
-             << ",\"m\"));\n"
+             << "      Operands[*(p + 1)]->setMCOperandNum(NumMCOperands);\n"
+             << "      Operands[*(p + 1)]->setConstraint(\"m\");\n"
              << "      ++NumMCOperands;\n"
              << "      break;\n";
       }
@@ -2606,16 +2604,12 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << "                       const SmallVectorImpl<MCParsedAsmOperand*> "
      << "&Operands);\n";
   OS << "  void convertToMapAndConstraints(unsigned Kind,\n                ";
-  OS << "           const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
-  OS.indent(29);
-  OS << "MatchInstMapAndConstraintsImpl &MapAndConstraints);\n";
+  OS << "           const SmallVectorImpl<MCParsedAsmOperand*> &Operands);\n";
   OS << "  bool mnemonicIsValid(StringRef Mnemonic);\n";
   OS << "  unsigned MatchInstructionImpl(\n";
   OS.indent(27);
   OS << "const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n"
-     << "                                unsigned &Kind, MCInst &Inst,\n";
-  OS.indent(30);
-  OS << "MatchInstMapAndConstraintsImpl &MapAndConstraints,\n"
+     << "                                MCInst &Inst,\n"
      << "                                unsigned &ErrorInfo,"
      << " bool matchingInlineAsm,\n"
      << "                                unsigned VariantID = 0);\n";
@@ -2808,8 +2802,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << Target.getName() << ClassName << "::\n"
      << "MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>"
      << " &Operands,\n";
-  OS << "                     unsigned &Kind, MCInst &Inst,\n"
-     << "SmallVectorImpl<std::pair< unsigned, std::string > > &MapAndConstraints,\n"
+  OS << "                     MCInst &Inst,\n"
      << "unsigned &ErrorInfo, bool matchingInlineAsm, unsigned VariantID) {\n";
 
   OS << "  // Eliminate obvious mismatches.\n";
@@ -2905,10 +2898,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "    }\n";
   OS << "\n";
   OS << "    if (matchingInlineAsm) {\n";
-  OS << "      Kind = it->ConvertFn;\n";
   OS << "      Inst.setOpcode(it->Opcode);\n";
-  OS << "      convertToMapAndConstraints(it->ConvertFn, Operands, "
-     << "MapAndConstraints);\n";
+  OS << "      convertToMapAndConstraints(it->ConvertFn, Operands);\n";
   OS << "      return Match_Success;\n";
   OS << "    }\n\n";
   OS << "    // We have selected a definite instruction, convert the parsed\n"
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index d0cd057cd34..9e453e0f6d2 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -792,7 +792,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
     if (!R->getValueAsBit("EmitAlias"))
       continue; // We were told not to emit the alias, but to emit the aliasee.
     const DagInit *DI = R->getValueAsDag("ResultInst");
-    const DefInit *Op = dynamic_cast<const DefInit*>(DI->getOperator());
+    const DefInit *Op = cast<DefInit>(DI->getOperator());
     AliasMap[getQualifiedName(Op->getDef())].push_back(Alias);
   }
 
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 0e14cbae38a..b8a6daf9e03 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -1,5 +1,4 @@
 set(LLVM_REQUIRES_EH 1)
-set(LLVM_REQUIRES_RTTI 1)
 set(LLVM_LINK_COMPONENTS Support)
 
 add_tablegen(llvm-tblgen LLVM
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 9c8ad67b423..e60aec9568f 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -91,11 +91,11 @@ void CodeEmitterGen::reverseBits(std::vector<Record*> &Insts) {
 // return the variable bit position.  Otherwise return -1.
 int CodeEmitterGen::getVariableBit(const std::string &VarName,
                                    BitsInit *BI, int bit) {
-  if (VarBitInit *VBI = dynamic_cast<VarBitInit*>(BI->getBit(bit))) {
-    if (VarInit *VI = dynamic_cast<VarInit*>(VBI->getBitVar()))
+  if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
+    if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
       if (VI->getName() == VarName)
         return VBI->getBitNum();
-  } else if (VarInit *VI = dynamic_cast<VarInit*>(BI->getBit(bit))) {
+  } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
     if (VI->getName() == VarName)
       return 0;
   }
@@ -269,7 +269,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
     // Start by filling in fixed values.
     uint64_t Value = 0;
     for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
-      if (BitInit *B = dynamic_cast<BitInit*>(BI->getBit(e-i-1)))
+      if (BitInit *B = dyn_cast<BitInit>(BI->getBit(e-i-1)))
         Value |= (uint64_t)B->getValue() << (e-i-1);
     }
     o << "    UINT64_C(" << Value << ")," << '\t' << "// " << R->getName() << "\n";
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index c91ec95e2e5..3b5511c0563 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -582,7 +582,7 @@ typedef DepVarMap::const_iterator DepVarMap_citer;
 
 static void FindDepVarsOf(TreePatternNode *N, DepVarMap &DepMap) {
   if (N->isLeaf()) {
-    if (dynamic_cast<DefInit*>(N->getLeafValue()) != NULL)
+    if (isa<DefInit>(N->getLeafValue()))
       DepMap[N->getName()]++;
   } else {
     for (size_t i = 0, e = N->getNumChildren(); i != e; ++i)
@@ -691,7 +691,7 @@ static unsigned getPatternSize(const TreePatternNode *P,
   unsigned Size = 3;  // The node itself.
   // If the root node is a ConstantSDNode, increases its size.
   // e.g. (set R32:$dst, 0).
-  if (P->isLeaf() && dynamic_cast<IntInit*>(P->getLeafValue()))
+  if (P->isLeaf() && isa<IntInit>(P->getLeafValue()))
     Size += 2;
 
   // FIXME: This is a hack to statically increase the priority of patterns
@@ -715,7 +715,7 @@ static unsigned getPatternSize(const TreePatternNode *P,
         Child->getType(0) != MVT::Other)
       Size += getPatternSize(Child, CGP);
     else if (Child->isLeaf()) {
-      if (dynamic_cast<IntInit*>(Child->getLeafValue()))
+      if (isa<IntInit>(Child->getLeafValue()))
         Size += 5;  // Matches a ConstantSDNode (+3) and a specific value (+2).
       else if (Child->getComplexPatternInfo(CGP))
         Size += getPatternSize(Child, CGP);
@@ -741,7 +741,7 @@ getPatternComplexity(const CodeGenDAGPatterns &CGP) const {
 std::string PatternToMatch::getPredicateCheck() const {
   std::string PredicateCheck;
   for (unsigned i = 0, e = Predicates->getSize(); i != e; ++i) {
-    if (DefInit *Pred = dynamic_cast<DefInit*>(Predicates->getElement(i))) {
+    if (DefInit *Pred = dyn_cast<DefInit>(Predicates->getElement(i))) {
       Record *Def = Pred->getDef();
       if (!Def->isSubClassOf("Predicate")) {
 #ifndef NDEBUG
@@ -864,7 +864,7 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
     // The NodeToApply must be a leaf node that is a VT.  OtherOperandNum must
     // have an integer type that is smaller than the VT.
     if (!NodeToApply->isLeaf() ||
-        !dynamic_cast<DefInit*>(NodeToApply->getLeafValue()) ||
+        !isa<DefInit>(NodeToApply->getLeafValue()) ||
         !static_cast<DefInit*>(NodeToApply->getLeafValue())->getDef()
                ->isSubClassOf("ValueType"))
       TP.error(N->getOperator()->getName() + " expects a VT operand!");
@@ -1021,8 +1021,9 @@ static unsigned GetNumNodeResults(Record *Operator, CodeGenDAGPatterns &CDP) {
     // Get the result tree.
     DagInit *Tree = Operator->getValueAsDag("Fragment");
     Record *Op = 0;
-    if (Tree && dynamic_cast<DefInit*>(Tree->getOperator()))
-      Op = dynamic_cast<DefInit*>(Tree->getOperator())->getDef();
+    if (Tree)
+      if (DefInit *DI = dyn_cast<DefInit>(Tree->getOperator()))
+        Op = DI->getDef();
     assert(Op && "Invalid Fragment");
     return GetNumNodeResults(Op, CDP);
   }
@@ -1096,8 +1097,8 @@ bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
     return false;
 
   if (isLeaf()) {
-    if (DefInit *DI = dynamic_cast<DefInit*>(getLeafValue())) {
-      if (DefInit *NDI = dynamic_cast<DefInit*>(N->getLeafValue())) {
+    if (DefInit *DI = dyn_cast<DefInit>(getLeafValue())) {
+      if (DefInit *NDI = dyn_cast<DefInit>(N->getLeafValue())) {
         return ((DI->getDef() == NDI->getDef())
                 && (DepVars.find(getName()) == DepVars.end()
                     || getName() == N->getName()));
@@ -1154,8 +1155,8 @@ SubstituteFormalArguments(std::map<std::string, TreePatternNode*> &ArgMap) {
     TreePatternNode *Child = getChild(i);
     if (Child->isLeaf()) {
       Init *Val = Child->getLeafValue();
-      if (dynamic_cast<DefInit*>(Val) &&
-          static_cast<DefInit*>(Val)->getDef()->getName() == "node") {
+      if (isa<DefInit>(Val) &&
+          cast<DefInit>(Val)->getDef()->getName() == "node") {
         // We found a use of a formal argument, replace it with its value.
         TreePatternNode *NewChild = ArgMap[Child->getName()];
         assert(NewChild && "Couldn't find formal argument!");
@@ -1316,8 +1317,7 @@ getIntrinsicInfo(const CodeGenDAGPatterns &CDP) const {
       getOperator() != CDP.get_intrinsic_wo_chain_sdnode())
     return 0;
 
-  unsigned IID =
-    dynamic_cast<IntInit*>(getChild(0)->getLeafValue())->getValue();
+  unsigned IID = cast<IntInit>(getChild(0)->getLeafValue())->getValue();
   return &CDP.getIntrinsicInfo(IID);
 }
 
@@ -1327,7 +1327,7 @@ const ComplexPattern *
 TreePatternNode::getComplexPatternInfo(const CodeGenDAGPatterns &CGP) const {
   if (!isLeaf()) return 0;
 
-  DefInit *DI = dynamic_cast<DefInit*>(getLeafValue());
+  DefInit *DI = dyn_cast<DefInit>(getLeafValue());
   if (DI && DI->getDef()->isSubClassOf("ComplexPattern"))
     return &CGP.getComplexPattern(DI->getDef());
   return 0;
@@ -1380,7 +1380,7 @@ TreePatternNode::isCommutativeIntrinsic(const CodeGenDAGPatterns &CDP) const {
 bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
   CodeGenDAGPatterns &CDP = TP.getDAGPatterns();
   if (isLeaf()) {
-    if (DefInit *DI = dynamic_cast<DefInit*>(getLeafValue())) {
+    if (DefInit *DI = dyn_cast<DefInit>(getLeafValue())) {
       // If it's a regclass or something else known, include the type.
       bool MadeChange = false;
       for (unsigned i = 0, e = Types.size(); i != e; ++i)
@@ -1389,7 +1389,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       return MadeChange;
     }
 
-    if (IntInit *II = dynamic_cast<IntInit*>(getLeafValue())) {
+    if (IntInit *II = dyn_cast<IntInit>(getLeafValue())) {
       assert(Types.size() == 1 && "Invalid IntInit");
 
       // Int inits are always integers. :)
@@ -1641,7 +1641,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
 static bool OnlyOnRHSOfCommutative(TreePatternNode *N) {
   if (!N->isLeaf() && N->getOperator()->getName() == "imm")
     return true;
-  if (N->isLeaf() && dynamic_cast<IntInit*>(N->getLeafValue()))
+  if (N->isLeaf() && isa<IntInit>(N->getLeafValue()))
     return true;
   return false;
 }
@@ -1730,7 +1730,7 @@ void TreePattern::ComputeNamedNodes(TreePatternNode *N) {
 
 
 TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
-  if (DefInit *DI = dynamic_cast<DefInit*>(TheInit)) {
+  if (DefInit *DI = dyn_cast<DefInit>(TheInit)) {
     Record *R = DI->getDef();
 
     // Direct reference to a leaf DagNode or PatFrag?  Turn it into a
@@ -1754,26 +1754,26 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
     return Res;
   }
 
-  if (IntInit *II = dynamic_cast<IntInit*>(TheInit)) {
+  if (IntInit *II = dyn_cast<IntInit>(TheInit)) {
     if (!OpName.empty())
       error("Constant int argument should not have a name!");
     return new TreePatternNode(II, 1);
   }
 
-  if (BitsInit *BI = dynamic_cast<BitsInit*>(TheInit)) {
+  if (BitsInit *BI = dyn_cast<BitsInit>(TheInit)) {
     // Turn this into an IntInit.
     Init *II = BI->convertInitializerTo(IntRecTy::get());
-    if (II == 0 || !dynamic_cast<IntInit*>(II))
+    if (II == 0 || !isa<IntInit>(II))
       error("Bits value must be constants!");
     return ParseTreePattern(II, OpName);
   }
 
-  DagInit *Dag = dynamic_cast<DagInit*>(TheInit);
+  DagInit *Dag = dyn_cast<DagInit>(TheInit);
   if (!Dag) {
     TheInit->dump();
     error("Pattern has unexpected init kind!");
   }
-  DefInit *OpDef = dynamic_cast<DefInit*>(Dag->getOperator());
+  DefInit *OpDef = dyn_cast<DefInit>(Dag->getOperator());
   if (!OpDef) error("Pattern has unexpected operator type!");
   Record *Operator = OpDef->getDef();
 
@@ -1938,7 +1938,7 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
           // us to match things like:
           //  def : Pat<(v1i64 (bitconvert(v2i32 DPR:$src))), (v1i64 DPR:$src)>;
           if (Nodes[i] == Trees[0] && Nodes[i]->isLeaf()) {
-            DefInit *DI = dynamic_cast<DefInit*>(Nodes[i]->getLeafValue());
+            DefInit *DI = dyn_cast<DefInit>(Nodes[i]->getLeafValue());
             if (DI && (DI->getDef()->isSubClassOf("RegisterClass") ||
                        DI->getDef()->isSubClassOf("RegisterOperand")))
               continue;
@@ -2103,7 +2103,7 @@ void CodeGenDAGPatterns::ParsePatternFragments() {
 
     // Parse the operands list.
     DagInit *OpsList = Fragments[i]->getValueAsDag("Operands");
-    DefInit *OpsOp = dynamic_cast<DefInit*>(OpsList->getOperator());
+    DefInit *OpsOp = dyn_cast<DefInit>(OpsList->getOperator());
     // Special cases: ops == outs == ins. Different names are used to
     // improve readability.
     if (!OpsOp ||
@@ -2115,9 +2115,8 @@ void CodeGenDAGPatterns::ParsePatternFragments() {
     // Copy over the arguments.
     Args.clear();
     for (unsigned j = 0, e = OpsList->getNumArgs(); j != e; ++j) {
-      if (!dynamic_cast<DefInit*>(OpsList->getArg(j)) ||
-          static_cast<DefInit*>(OpsList->getArg(j))->
-          getDef()->getName() != "node")
+      if (!isa<DefInit>(OpsList->getArg(j)) ||
+          cast<DefInit>(OpsList->getArg(j))->getDef()->getName() != "node")
         P->error("Operands list should all be 'node' values.");
       if (OpsList->getArgName(j).empty())
         P->error("Operands list should have names for each operand!");
@@ -2218,7 +2217,7 @@ static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
   // No name -> not interesting.
   if (Pat->getName().empty()) {
     if (Pat->isLeaf()) {
-      DefInit *DI = dynamic_cast<DefInit*>(Pat->getLeafValue());
+      DefInit *DI = dyn_cast<DefInit>(Pat->getLeafValue());
       if (DI && (DI->getDef()->isSubClassOf("RegisterClass") ||
                  DI->getDef()->isSubClassOf("RegisterOperand")))
         I->error("Input " + DI->getDef()->getName() + " must be named!");
@@ -2228,7 +2227,7 @@ static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
 
   Record *Rec;
   if (Pat->isLeaf()) {
-    DefInit *DI = dynamic_cast<DefInit*>(Pat->getLeafValue());
+    DefInit *DI = dyn_cast<DefInit>(Pat->getLeafValue());
     if (!DI) I->error("Input $" + Pat->getName() + " must be an identifier!");
     Rec = DI->getDef();
   } else {
@@ -2246,7 +2245,7 @@ static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
   }
   Record *SlotRec;
   if (Slot->isLeaf()) {
-    SlotRec = dynamic_cast<DefInit*>(Slot->getLeafValue())->getDef();
+    SlotRec = cast<DefInit>(Slot->getLeafValue())->getDef();
   } else {
     assert(Slot->getNumChildren() == 0 && "can't be a use with children!");
     SlotRec = Slot->getOperator();
@@ -2281,7 +2280,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
       if (!Dest->isLeaf())
         I->error("implicitly defined value should be a register!");
 
-      DefInit *Val = dynamic_cast<DefInit*>(Dest->getLeafValue());
+      DefInit *Val = dyn_cast<DefInit>(Dest->getLeafValue());
       if (!Val || !Val->getDef()->isSubClassOf("Register"))
         I->error("implicitly defined value should be a register!");
       InstImpResults.push_back(Val->getDef());
@@ -2322,7 +2321,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
     if (!Dest->isLeaf())
       I->error("set destination should be a register!");
 
-    DefInit *Val = dynamic_cast<DefInit*>(Dest->getLeafValue());
+    DefInit *Val = dyn_cast<DefInit>(Dest->getLeafValue());
     if (!Val)
       I->error("set destination should be a register!");
 
@@ -2381,7 +2380,7 @@ private:
       return false;
 
     const TreePatternNode *N0 = N->getChild(0);
-    if (!N0->isLeaf() || !dynamic_cast<DefInit*>(N0->getLeafValue()))
+    if (!N0->isLeaf() || !isa<DefInit>(N0->getLeafValue()))
       return false;
 
     const TreePatternNode *N1 = N->getChild(1);
@@ -2399,7 +2398,7 @@ private:
 public:
   void AnalyzeNode(const TreePatternNode *N) {
     if (N->isLeaf()) {
-      if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) {
+      if (DefInit *DI = dyn_cast<DefInit>(N->getLeafValue())) {
         Record *LeafRec = DI->getDef();
         // Handle ComplexPattern leaves.
         if (LeafRec->isSubClassOf("ComplexPattern")) {
@@ -2504,7 +2503,7 @@ static bool InferFromPattern(CodeGenInstruction &InstInfo,
 /// hasNullFragReference - Return true if the DAG has any reference to the
 /// null_frag operator.
 static bool hasNullFragReference(DagInit *DI) {
-  DefInit *OpDef = dynamic_cast<DefInit*>(DI->getOperator());
+  DefInit *OpDef = dyn_cast<DefInit>(DI->getOperator());
   if (!OpDef) return false;
   Record *Operator = OpDef->getDef();
 
@@ -2512,7 +2511,7 @@ static bool hasNullFragReference(DagInit *DI) {
   if (Operator->getName() == "null_frag") return true;
   // If any of the arguments reference the null fragment, return true.
   for (unsigned i = 0, e = DI->getNumArgs(); i != e; ++i) {
-    DagInit *Arg = dynamic_cast<DagInit*>(DI->getArg(i));
+    DagInit *Arg = dyn_cast<DagInit>(DI->getArg(i));
     if (Arg && hasNullFragReference(Arg))
       return true;
   }
@@ -2524,7 +2523,7 @@ static bool hasNullFragReference(DagInit *DI) {
 /// the null_frag operator.
 static bool hasNullFragReference(ListInit *LI) {
   for (unsigned i = 0, e = LI->getSize(); i != e; ++i) {
-    DagInit *DI = dynamic_cast<DagInit*>(LI->getElement(i));
+    DagInit *DI = dyn_cast<DagInit>(LI->getElement(i));
     assert(DI && "non-dag in an instruction Pattern list?!");
     if (hasNullFragReference(DI))
       return true;
@@ -2552,7 +2551,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
   for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
     ListInit *LI = 0;
 
-    if (dynamic_cast<ListInit*>(Instrs[i]->getValueInit("Pattern")))
+    if (isa<ListInit>(Instrs[i]->getValueInit("Pattern")))
       LI = Instrs[i]->getValueAsListInit("Pattern");
 
     // If there is no pattern, only collect minimal information about the
@@ -2647,7 +2646,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
 
       if (i == 0)
         Res0Node = RNode;
-      Record *R = dynamic_cast<DefInit*>(RNode->getLeafValue())->getDef();
+      Record *R = cast<DefInit>(RNode->getLeafValue())->getDef();
       if (R == 0)
         I->error("Operand $" + OpName + " should be a set destination: all "
                  "outputs must occur before inputs in operand list!");
@@ -2689,8 +2688,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
       TreePatternNode *InVal = InstInputsCheck[OpName];
       InstInputsCheck.erase(OpName);   // It occurred, remove from map.
 
-      if (InVal->isLeaf() &&
-          dynamic_cast<DefInit*>(InVal->getLeafValue())) {
+      if (InVal->isLeaf() && isa<DefInit>(InVal->getLeafValue())) {
         Record *InRec = static_cast<DefInit*>(InVal->getLeafValue())->getDef();
         if (Op.Rec != InRec && !InRec->isSubClassOf("ComplexPattern"))
           I->error("Operand $" + OpName + "'s register class disagrees"
@@ -3354,7 +3352,7 @@ static void GenerateVariantsOf(TreePatternNode *N,
     for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
       TreePatternNode *Child = N->getChild(i);
       if (Child->isLeaf())
-        if (DefInit *DI = dynamic_cast<DefInit*>(Child->getLeafValue())) {
+        if (DefInit *DI = dyn_cast<DefInit>(Child->getLeafValue())) {
           Record *RR = DI->getDef();
           if (RR->isSubClassOf("Register"))
             continue;
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 836279b89ab..99d2f173a87 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -32,7 +32,7 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
 
   DagInit *OutDI = R->getValueAsDag("OutOperandList");
 
-  if (DefInit *Init = dynamic_cast<DefInit*>(OutDI->getOperator())) {
+  if (DefInit *Init = dyn_cast<DefInit>(OutDI->getOperator())) {
     if (Init->getDef()->getName() != "outs")
       throw R->getName() + ": invalid def name for output list: use 'outs'";
   } else
@@ -41,7 +41,7 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
   NumDefs = OutDI->getNumArgs();
 
   DagInit *InDI = R->getValueAsDag("InOperandList");
-  if (DefInit *Init = dynamic_cast<DefInit*>(InDI->getOperator())) {
+  if (DefInit *Init = dyn_cast<DefInit>(InDI->getOperator())) {
     if (Init->getDef()->getName() != "ins")
       throw R->getName() + ": invalid def name for input list: use 'ins'";
   } else
@@ -60,7 +60,7 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
       ArgName = InDI->getArgName(i-NumDefs);
     }
 
-    DefInit *Arg = dynamic_cast<DefInit*>(ArgInit);
+    DefInit *Arg = dyn_cast<DefInit>(ArgInit);
     if (!Arg)
       throw "Illegal operand for the '" + R->getName() + "' instruction!";
 
@@ -80,9 +80,8 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
       MIOpInfo = Rec->getValueAsDag("MIOperandInfo");
 
       // Verify that MIOpInfo has an 'ops' root value.
-      if (!dynamic_cast<DefInit*>(MIOpInfo->getOperator()) ||
-          dynamic_cast<DefInit*>(MIOpInfo->getOperator())
-          ->getDef()->getName() != "ops")
+      if (!isa<DefInit>(MIOpInfo->getOperator()) ||
+          cast<DefInit>(MIOpInfo->getOperator())->getDef()->getName() != "ops")
         throw "Bad value for MIOperandInfo in operand '" + Rec->getName() +
         "'\n";
 
@@ -416,7 +415,7 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
                                        ArrayRef<SMLoc> Loc, CodeGenTarget &T,
                                        ResultOperand &ResOp) {
   Init *Arg = Result->getArg(AliasOpNo);
-  DefInit *ADI = dynamic_cast<DefInit*>(Arg);
+  DefInit *ADI = dyn_cast<DefInit>(Arg);
 
   if (ADI && ADI->getDef() == InstOpRec) {
     // If the operand is a record, it must have a name, and the record type
@@ -446,7 +445,7 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
       DagInit *DI = InstOpRec->getValueAsDag("MIOperandInfo");
       // The operand info should only have a single (register) entry. We
       // want the register class of it.
-      InstOpRec = dynamic_cast<DefInit*>(DI->getArg(0))->getDef();
+      InstOpRec = cast<DefInit>(DI->getArg(0))->getDef();
     }
 
     if (InstOpRec->isSubClassOf("RegisterOperand"))
@@ -486,7 +485,7 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
   }
 
   // Literal integers.
-  if (IntInit *II = dynamic_cast<IntInit*>(Arg)) {
+  if (IntInit *II = dyn_cast<IntInit>(Arg)) {
     if (hasSubOps || !InstOpRec->isSubClassOf("Operand"))
       return false;
     // Integer arguments can't have names.
@@ -518,7 +517,7 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) {
   Result = R->getValueAsDag("ResultInst");
 
   // Verify that the root of the result is an instruction.
-  DefInit *DI = dynamic_cast<DefInit*>(Result->getOperator());
+  DefInit *DI = dyn_cast<DefInit>(Result->getOperator());
   if (DI == 0 || !DI->getDef()->isSubClassOf("Instruction"))
     throw TGError(R->getLoc(), "result of inst alias should be an instruction");
 
@@ -528,7 +527,7 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) {
   // the same class.
   StringMap<Record*> NameClass;
   for (unsigned i = 0, e = Result->getNumArgs(); i != e; ++i) {
-    DefInit *ADI = dynamic_cast<DefInit*>(Result->getArg(i));
+    DefInit *ADI = dyn_cast<DefInit>(Result->getArg(i));
     if (!ADI || Result->getArgName(i).empty())
       continue;
     // Verify we don't have something like: (someinst GR16:$foo, GR32:$foo)
@@ -575,7 +574,7 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) {
       } else {
          DagInit *MIOI = ResultInst->Operands[i].MIOperandInfo;
          for (unsigned SubOp = 0; SubOp != NumSubOps; ++SubOp) {
-          Record *SubRec = dynamic_cast<DefInit*>(MIOI->getArg(SubOp))->getDef();
+          Record *SubRec = cast<DefInit>(MIOI->getArg(SubOp))->getDef();
 
           // Take care to instantiate each of the suboperands with the correct
           // nomenclature: $foo.bar
@@ -596,7 +595,7 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) {
       for (unsigned SubOp = 0; SubOp != NumSubOps; ++SubOp) {
         if (AliasOpNo >= Result->getNumArgs())
           throw TGError(R->getLoc(), "not enough arguments for instruction!");
-        Record *SubRec = dynamic_cast<DefInit*>(MIOI->getArg(SubOp))->getDef();
+        Record *SubRec = cast<DefInit>(MIOI->getArg(SubOp))->getDef();
         if (tryAliasOpMatch(Result, AliasOpNo, SubRec, false,
                             R->getLoc(), T, ResOp)) {
           ResultOperands.push_back(ResOp);
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index bf4f2a39445..fc101eec614 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -59,7 +59,7 @@ struct InstRegexOp : public SetTheory::Operator {
     SmallVector<Regex*, 4> RegexList;
     for (DagInit::const_arg_iterator
            AI = Expr->arg_begin(), AE = Expr->arg_end(); AI != AE; ++AI) {
-      StringInit *SI = dynamic_cast<StringInit*>(*AI);
+      StringInit *SI = dyn_cast<StringInit>(*AI);
       if (!SI)
         throw "instregex requires pattern string: " + Expr->getAsString();
       std::string pat = SI->getValue();
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index 3ca16f04269..7c6ce3babcd 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -99,8 +99,6 @@ public:
 
   OwningPtr<Matcher> &getNextPtr() { return Next; }
 
-  static inline bool classof(const Matcher *) { return true; }
-
   bool isEqual(const Matcher *M) const {
     if (getKind() != M->getKind()) return false;
     return isEqualImpl(M);
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index b2912699330..70c6fe6000f 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -203,7 +203,7 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
   assert(N->isLeaf() && "Not a leaf?");
 
   // Direct match against an integer constant.
-  if (IntInit *II = dynamic_cast<IntInit*>(N->getLeafValue())) {
+  if (IntInit *II = dyn_cast<IntInit>(N->getLeafValue())) {
     // If this is the root of the dag we're matching, we emit a redundant opcode
     // check to ensure that this gets folded into the normal top-level
     // OpcodeSwitch.
@@ -215,7 +215,7 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
     return AddMatcher(new CheckIntegerMatcher(II->getValue()));
   }
 
-  DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue());
+  DefInit *DI = dyn_cast<DefInit>(N->getLeafValue());
   if (DI == 0) {
     errs() << "Unknown leaf kind: " << *N << "\n";
     abort();
@@ -283,7 +283,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
        N->getOperator()->getName() == "or") &&
       N->getChild(1)->isLeaf() && N->getChild(1)->getPredicateFns().empty() &&
       N->getPredicateFns().empty()) {
-    if (IntInit *II = dynamic_cast<IntInit*>(N->getChild(1)->getLeafValue())) {
+    if (IntInit *II = dyn_cast<IntInit>(N->getChild(1)->getLeafValue())) {
       if (!isPowerOf2_32(II->getValue())) {  // Don't bother with single bits.
         // If this is at the root of the pattern, we emit a redundant
         // CheckOpcode so that the following checks get factored properly under
@@ -572,14 +572,14 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
                                          SmallVectorImpl<unsigned> &ResultOps) {
   assert(N->isLeaf() && "Must be a leaf");
 
-  if (IntInit *II = dynamic_cast<IntInit*>(N->getLeafValue())) {
+  if (IntInit *II = dyn_cast<IntInit>(N->getLeafValue())) {
     AddMatcher(new EmitIntegerMatcher(II->getValue(), N->getType(0)));
     ResultOps.push_back(NextRecordedOperandNo++);
     return;
   }
 
   // If this is an explicit register reference, handle it.
-  if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) {
+  if (DefInit *DI = dyn_cast<DefInit>(N->getLeafValue())) {
     Record *Def = DI->getDef();
     if (Def->isSubClassOf("Register")) {
       const CodeGenRegister *Reg =
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index ca784d0dda9..03e918fa4b0 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -245,7 +245,7 @@ struct OperandsSignature {
       if (Op->getType(0) != VT)
         return false;
 
-      DefInit *OpDI = dynamic_cast<DefInit*>(Op->getLeafValue());
+      DefInit *OpDI = dyn_cast<DefInit>(Op->getLeafValue());
       if (!OpDI)
         return false;
       Record *OpLeafRec = OpDI->getDef();
@@ -406,13 +406,12 @@ static std::string PhyRegForNode(TreePatternNode *Op,
   if (!Op->isLeaf())
     return PhysReg;
 
-  DefInit *OpDI = dynamic_cast<DefInit*>(Op->getLeafValue());
-  Record *OpLeafRec = OpDI->getDef();
+  Record *OpLeafRec = cast<DefInit>(Op->getLeafValue())->getDef();
   if (!OpLeafRec->isSubClassOf("Register"))
     return PhysReg;
 
-  PhysReg += static_cast<StringInit*>(OpLeafRec->getValue( \
-             "Namespace")->getValue())->getValue();
+  PhysReg += cast<StringInit>(OpLeafRec->getValue("Namespace")->getValue())
+               ->getValue();
   PhysReg += "::";
   PhysReg += Target.getRegBank().getReg(OpLeafRec)->getName();
   return PhysReg;
@@ -473,7 +472,7 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
       // a bit too complicated for now.
       if (!Dst->getChild(1)->isLeaf()) continue;
 
-      DefInit *SR = dynamic_cast<DefInit*>(Dst->getChild(1)->getLeafValue());
+      DefInit *SR = dyn_cast<DefInit>(Dst->getChild(1)->getLeafValue());
       if (SR)
         SubRegNo = getQualifiedName(SR->getDef());
       else
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index b5f62d8dea1..c53776b9ffb 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -142,7 +142,7 @@ static int Value(bit_value_t V) {
   return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
 }
 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) {
-  if (BitInit *bit = dynamic_cast<BitInit*>(bits.getBit(index)))
+  if (BitInit *bit = dyn_cast<BitInit>(bits.getBit(index)))
     return bit->getValue() ? BIT_TRUE : BIT_FALSE;
 
   // The bit is uninitialized.
@@ -1757,8 +1757,8 @@ static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc,
     // for decoding register classes.
     // FIXME: This need to be extended to handle instructions with custom
     // decoder methods, and operands with (simple) MIOperandInfo's.
-    TypedInit *TI = dynamic_cast<TypedInit*>(NI->first);
-    RecordRecTy *Type = dyn_cast<RecordRecTy>(TI->getType());
+    TypedInit *TI = cast<TypedInit>(NI->first);
+    RecordRecTy *Type = cast<RecordRecTy>(TI->getType());
     Record *TypeRecord = Type->getRecord();
     bool isReg = false;
     if (TypeRecord->isSubClassOf("RegisterOperand"))
@@ -1770,7 +1770,7 @@ static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc,
 
     RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
     StringInit *String = DecoderString ?
-      dynamic_cast<StringInit*>(DecoderString->getValue()) : 0;
+      dyn_cast<StringInit>(DecoderString->getValue()) : 0;
     if (!isReg && String && String->getValue() != "")
       Decoder = String->getValue();
 
@@ -1781,11 +1781,11 @@ static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc,
 
     for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) {
       VarInit *Var = 0;
-      VarBitInit *BI = dynamic_cast<VarBitInit*>(Bits.getBit(bi));
+      VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
       if (BI)
-        Var = dynamic_cast<VarInit*>(BI->getBitVar());
+        Var = dyn_cast<VarInit>(BI->getBitVar());
       else
-        Var = dynamic_cast<VarInit*>(Bits.getBit(bi));
+        Var = dyn_cast<VarInit>(Bits.getBit(bi));
 
       if (!Var) {
         if (Base != ~0U) {
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 79602da92b9..e447c16b164 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -89,7 +89,7 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
       for (unsigned j = 0, e = Inst.Operands[i].MINumOperands; j != e; ++j) {
         OperandList.push_back(Inst.Operands[i]);
 
-        Record *OpR = dynamic_cast<DefInit*>(MIOI->getArg(j))->getDef();
+        Record *OpR = cast<DefInit>(MIOI->getArg(j))->getDef();
         OperandList.back().Rec = OpR;
       }
     }
@@ -299,7 +299,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
                                   const OperandInfoMapTy &OpInfo,
                                   raw_ostream &OS) {
   int MinOperands = 0;
-  if (!Inst.Operands.size() == 0)
+  if (!Inst.Operands.empty())
     // Each logical operand can be multiple MI operands.
     MinOperands = Inst.Operands.back().MIOperandNo +
                   Inst.Operands.back().MINumOperands;
@@ -345,7 +345,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (!TSF) throw "no TSFlags?";
   uint64_t Value = 0;
   for (unsigned i = 0, e = TSF->getNumBits(); i != e; ++i) {
-    if (BitInit *Bit = dynamic_cast<BitInit*>(TSF->getBit(i)))
+    if (BitInit *Bit = dyn_cast<BitInit>(TSF->getBit(i)))
       Value |= uint64_t(Bit->getValue()) << i;
     else
       throw "Invalid TSFlags bit in " + Inst.TheDef->getName();
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 080e711d556..e830a66a33b 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -510,10 +510,10 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
   OS << "// Add parameter attributes that are not common to all intrinsics.\n";
   OS << "#ifdef GET_INTRINSIC_ATTRIBUTES\n";
   if (TargetOnly)
-    OS << "static AttrListPtr getAttributes(" << TargetPrefix 
+    OS << "static AttrListPtr getAttributes(LLVMContext &C, " << TargetPrefix
        << "Intrinsic::ID id) {\n";
   else
-    OS << "AttrListPtr Intrinsic::getAttributes(ID id) {\n";
+    OS << "AttrListPtr Intrinsic::getAttributes(LLVMContext &C, ID id) {\n";
 
   // Compute the maximum number of attribute arguments and the map
   typedef std::map<const CodeGenIntrinsic*, unsigned,
@@ -582,7 +582,7 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
           ++ai;
         } while (ai != ae && intrinsic.ArgumentAttributes[ai].first == argNo);
 
-        OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get("
+        OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get(C, "
            << argNo+1 << ", AttrVec);\n";
       }
     }
@@ -606,8 +606,8 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
         OS << "      AttrVec.push_back(Attributes::ReadNone);\n"; 
         break;
       }
-      OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get(~0, "
-         << "AttrVec);\n";
+      OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get(C, "
+         << "AttrListPtr::FunctionIndex, AttrVec);\n";
     }
 
     if (numAttrs) {
diff --git a/utils/TableGen/Makefile b/utils/TableGen/Makefile
index 0c4619d1a25..bdf0ba01fbf 100644
--- a/utils/TableGen/Makefile
+++ b/utils/TableGen/Makefile
@@ -11,7 +11,6 @@ LEVEL = ../..
 TOOLNAME = llvm-tblgen
 USEDLIBS = LLVMTableGen.a LLVMSupport.a
 REQUIRES_EH := 1
-REQUIRES_RTTI := 1
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/utils/TableGen/PseudoLoweringEmitter.cpp b/utils/TableGen/PseudoLoweringEmitter.cpp
index 1896a7baae6..b0241c7c27b 100644
--- a/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -74,7 +74,7 @@ addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Insn,
                      IndexedMap<OpData> &OperandMap, unsigned BaseIdx) {
   unsigned OpsAdded = 0;
   for (unsigned i = 0, e = Dag->getNumArgs(); i != e; ++i) {
-    if (DefInit *DI = dynamic_cast<DefInit*>(Dag->getArg(i))) {
+    if (DefInit *DI = dyn_cast<DefInit>(Dag->getArg(i))) {
       // Physical register reference. Explicit check for the special case
       // "zero_reg" definition.
       if (DI->getDef()->isSubClassOf("Register") ||
@@ -100,11 +100,11 @@ addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Insn,
       for (unsigned I = 0, E = Insn.Operands[i].MINumOperands; I != E; ++I)
         OperandMap[BaseIdx + i + I].Kind = OpData::Operand;
       OpsAdded += Insn.Operands[i].MINumOperands;
-    } else if (IntInit *II = dynamic_cast<IntInit*>(Dag->getArg(i))) {
+    } else if (IntInit *II = dyn_cast<IntInit>(Dag->getArg(i))) {
       OperandMap[BaseIdx + i].Kind = OpData::Imm;
       OperandMap[BaseIdx + i].Data.Imm = II->getValue();
       ++OpsAdded;
-    } else if (DagInit *SubDag = dynamic_cast<DagInit*>(Dag->getArg(i))) {
+    } else if (DagInit *SubDag = dyn_cast<DagInit>(Dag->getArg(i))) {
       // Just add the operands recursively. This is almost certainly
       // a constant value for a complex operand (> 1 MI operand).
       unsigned NewOps =
@@ -127,7 +127,7 @@ void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) {
   assert(Dag && "Missing result instruction in pseudo expansion!");
   DEBUG(dbgs() << "  Result: " << *Dag << "\n");
 
-  DefInit *OpDef = dynamic_cast<DefInit*>(Dag->getOperator());
+  DefInit *OpDef = dyn_cast<DefInit>(Dag->getOperator());
   if (!OpDef)
     throw TGError(Rec->getLoc(), Rec->getName() +
                   " has unexpected operator type!");
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 87624665cbc..ad1dab4ac08 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -325,7 +325,7 @@ RegisterInfoEmitter::EmitRegMappingTables(raw_ostream &OS,
     if (!V || !V->getValue())
       continue;
 
-    DefInit *DI = dynamic_cast<DefInit*>(V->getValue());
+    DefInit *DI = cast<DefInit>(V->getValue());
     Record *Alias = DI->getDef();
     DwarfRegNums[Reg] = DwarfRegNums[Alias];
   }
@@ -751,7 +751,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
     BitsInit *BI = Reg->getValueAsBitsInit("HWEncoding");
     uint64_t Value = 0;
     for (unsigned b = 0, be = BI->getNumBits(); b != be; ++b) {
-      if (BitInit *B = dynamic_cast<BitInit*>(BI->getBit(b)))
+      if (BitInit *B = dyn_cast<BitInit>(BI->getBit(b)))
       Value |= (uint64_t)B->getValue() << b;
     }
     OS << "  " << Value << ",\n";
diff --git a/utils/TableGen/SetTheory.cpp b/utils/TableGen/SetTheory.cpp
index bdca9a63bd2..5b760e7a233 100644
--- a/utils/TableGen/SetTheory.cpp
+++ b/utils/TableGen/SetTheory.cpp
@@ -72,7 +72,7 @@ struct SetIntBinOp : public SetTheory::Operator {
       throw "Operator requires (Op Set, Int) arguments: " + Expr->getAsString();
     RecSet Set;
     ST.evaluate(Expr->arg_begin()[0], Set);
-    IntInit *II = dynamic_cast<IntInit*>(Expr->arg_begin()[1]);
+    IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1]);
     if (!II)
       throw "Second argument must be an integer: " + Expr->getAsString();
     apply2(ST, Expr, Set, II->getValue(), Elts);
@@ -165,27 +165,27 @@ struct SequenceOp : public SetTheory::Operator {
       throw "Bad args to (sequence \"Format\", From, To): " +
         Expr->getAsString();
     else if (Expr->arg_size() == 4) {
-      if (IntInit *II = dynamic_cast<IntInit*>(Expr->arg_begin()[3])) {
+      if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[3])) {
         Step = II->getValue();
       } else
         throw "Stride must be an integer: " + Expr->getAsString();
     }
 
     std::string Format;
-    if (StringInit *SI = dynamic_cast<StringInit*>(Expr->arg_begin()[0]))
+    if (StringInit *SI = dyn_cast<StringInit>(Expr->arg_begin()[0]))
       Format = SI->getValue();
     else
       throw "Format must be a string: " + Expr->getAsString();
 
     int64_t From, To;
-    if (IntInit *II = dynamic_cast<IntInit*>(Expr->arg_begin()[1]))
+    if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1]))
       From = II->getValue();
     else
       throw "From must be an integer: " + Expr->getAsString();
     if (From < 0 || From >= (1 << 30))
       throw "From out of range";
 
-    if (IntInit *II = dynamic_cast<IntInit*>(Expr->arg_begin()[2]))
+    if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[2]))
       To = II->getValue();
     else
       throw "From must be an integer: " + Expr->getAsString();
@@ -193,7 +193,7 @@ struct SequenceOp : public SetTheory::Operator {
       throw "To out of range";
 
     RecordKeeper &Records =
-      dynamic_cast<DefInit&>(*Expr->getOperator()).getDef()->getRecords();
+      cast<DefInit>(Expr->getOperator())->getDef()->getRecords();
 
     Step *= From <= To ? 1 : -1;
     while (true) {
@@ -261,7 +261,7 @@ void SetTheory::addFieldExpander(StringRef ClassName, StringRef FieldName) {
 
 void SetTheory::evaluate(Init *Expr, RecSet &Elts) {
   // A def in a list can be a just an element, or it may expand.
-  if (DefInit *Def = dynamic_cast<DefInit*>(Expr)) {
+  if (DefInit *Def = dyn_cast<DefInit>(Expr)) {
     if (const RecVec *Result = expand(Def->getDef()))
       return Elts.insert(Result->begin(), Result->end());
     Elts.insert(Def->getDef());
@@ -269,14 +269,14 @@ void SetTheory::evaluate(Init *Expr, RecSet &Elts) {
   }
 
   // Lists simply expand.
-  if (ListInit *LI = dynamic_cast<ListInit*>(Expr))
+  if (ListInit *LI = dyn_cast<ListInit>(Expr))
     return evaluate(LI->begin(), LI->end(), Elts);
 
   // Anything else must be a DAG.
-  DagInit *DagExpr = dynamic_cast<DagInit*>(Expr);
+  DagInit *DagExpr = dyn_cast<DagInit>(Expr);
   if (!DagExpr)
     throw "Invalid set element: " + Expr->getAsString();
-  DefInit *OpInit = dynamic_cast<DefInit*>(DagExpr->getOperator());
+  DefInit *OpInit = dyn_cast<DefInit>(DagExpr->getOperator());
   if (!OpInit)
     throw "Bad set expression: " + Expr->getAsString();
   Operator *Op = Operators.lookup(OpInit->getDef()->getName());
@@ -296,7 +296,7 @@ const RecVec *SetTheory::expand(Record *Set) {
     const std::vector<Record*> &SC = Set->getSuperClasses();
     for (unsigned i = 0, e = SC.size(); i != e; ++i) {
       // Skip unnamed superclasses.
-      if (!dynamic_cast<const StringInit *>(SC[i]->getNameInit()))
+      if (!dyn_cast<StringInit>(SC[i]->getNameInit()))
         continue;
       if (Expander *Exp = Expanders.lookup(SC[i]->getName())) {
         // This breaks recursive definitions.
diff --git a/utils/TableGen/TGValueTypes.cpp b/utils/TableGen/TGValueTypes.cpp
index af0d9f44cf4..3ac71a49147 100644
--- a/utils/TableGen/TGValueTypes.cpp
+++ b/utils/TableGen/TGValueTypes.cpp
@@ -15,13 +15,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Casting.h"
 #include <map>
 using namespace llvm;
 
 namespace llvm {
 
 class Type {
+protected:
+  enum TypeKind {
+    TK_ExtendedIntegerType,
+    TK_ExtendedVectorType
+  };
+private:
+  TypeKind Kind;
 public:
+  TypeKind getKind() const {
+    return Kind;
+  }
+  Type(TypeKind K) : Kind(K) {}
   virtual unsigned getSizeInBits() const = 0;
   virtual ~Type() {}
 };
@@ -32,7 +44,10 @@ class ExtendedIntegerType : public Type {
   unsigned BitWidth;
 public:
   explicit ExtendedIntegerType(unsigned bits)
-    : BitWidth(bits) {}
+    : Type(TK_ExtendedIntegerType), BitWidth(bits) {}
+  static bool classof(const Type *T) {
+    return T->getKind() == TK_ExtendedIntegerType;
+  }
   unsigned getSizeInBits() const {
     return getBitWidth();
   }
@@ -46,7 +61,10 @@ class ExtendedVectorType : public Type {
   unsigned NumElements;
 public:
   ExtendedVectorType(EVT elty, unsigned num)
-    : ElementType(elty), NumElements(num) {}
+    : Type(TK_ExtendedVectorType), ElementType(elty), NumElements(num) {}
+  static bool classof(const Type *T) {
+    return T->getKind() == TK_ExtendedVectorType;
+  }
   unsigned getSizeInBits() const {
     return getNumElements() * getElementType().getSizeInBits();
   }
@@ -71,12 +89,12 @@ bool EVT::isExtendedFloatingPoint() const {
 
 bool EVT::isExtendedInteger() const {
   assert(isExtended() && "Type is not extended!");
-  return dynamic_cast<const ExtendedIntegerType *>(LLVMTy) != 0;
+  return isa<ExtendedIntegerType>(LLVMTy);
 }
 
 bool EVT::isExtendedVector() const {
   assert(isExtended() && "Type is not extended!");
-  return dynamic_cast<const ExtendedVectorType *>(LLVMTy) != 0;
+  return isa<ExtendedVectorType>(LLVMTy);
 }
 
 bool EVT::isExtended64BitVector() const {