2 files changed, 101 insertions, 0 deletions
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index ea78f43158..9c4d7c038b 100644
--- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -20,6 +20,8 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
 #include "llvm/Target/TargetLowering.h"
@@ -62,6 +64,77 @@ public:
     TargetTransformInfo::getAnalysisUsage(AU);
   }
 
+  bool isIntrinsicListVectorizable(const ArrayRef<Value*> &VL) const override {
+    IntrinsicInst *VL0 = cast<IntrinsicInst>(VL[0]);
+    ConstantInt *Cst = dyn_cast<ConstantInt>(VL0->getArgOperand(1));
+    if (!Cst)
+      return false;
+    unsigned VL0Offset = Cst->getZExtValue();
+    for (unsigned i = 0, e = VL.size(); i < e; i++) {
+      IntrinsicInst *Inst = cast<IntrinsicInst>(VL[i]);
+      if (Inst->getCalledFunction() != VL0->getCalledFunction())
+        return false;
+      // Check if it's the same buffer
+      if (Inst->getArgOperand(0) != VL0->getArgOperand(0))
+        return false;
+      ConstantInt *Offset = dyn_cast<ConstantInt>(Inst->getArgOperand(1));
+      if (!Offset || Offset->getZExtValue() - VL0Offset != 4 * i)
+        return false;
+    }
+    return true;
+  }
+
+  SmallVector<unsigned, 2> getOperandsToVectorize(Value *VL) const override {
+    return SmallVector<unsigned, 2>();
+  }
+
+  Value *vectorizesIntrinsic(IRBuilder<> &Builder,
+                             const ArrayRef<Value *> &II,
+                             const ArrayRef<Value *> &Ops) const override {
+    IntrinsicInst *Inst = cast<IntrinsicInst>(II[0]);
+
+    Module *M = Inst->getParent()->getParent()->getParent();
+    Function *Fn = M->getFunction("llvm.SI.load.const.v2f32");
+    if (!Fn) {
+      Type *IntType = Type::getInt32Ty(M->getContext());
+
+      Type *Tys[] = {
+        VectorType::get(IntType, 4),
+        IntType
+      };
+
+      Type *V2F32 = VectorType::get(Type::getFloatTy(M->getContext()), 2);
+      FunctionType *FT = FunctionType::get(V2F32, Tys, false);
+      Fn = Function::Create(FT, GlobalValue::ExternalLinkage,
+          "llvm.SI.load.const.v2f32", M);
+      Fn->addFnAttr(Attribute::ReadNone);
+    }
+
+    Value *Operands[] = {
+      Inst->getArgOperand(0),
+      Inst->getArgOperand(1),
+    };
+
+    return Builder.CreateCall(Fn, Operands);
+  }
+
+  unsigned getArithmeticInstrCost(unsigned, Type *Ty,
+                                  OperandValueKind,
+                                  OperandValueKind) const override {
+    if (Ty->isVectorTy())
+      return dyn_cast<VectorType>(Ty)->getVectorNumElements();
+    return 1;
+  }
+
+  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                                 ArrayRef<Type *> Tys) const override {
+    return 1;
+  }
+
+  unsigned getVectorInstrCost(unsigned, Type *, unsigned ) const override {
+    return 0;
+  }
+
   /// Pass identification.
   static char ID;
 
diff --git a/test/Transforms/SLPVectorizer/R600/intrinsics.ll b/test/Transforms/SLPVectorizer/R600/intrinsics.ll
new file mode 100644
index 0000000000..d9ae9086ef
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/R600/intrinsics.ll
@@ -0,0 +1,28 @@
+; RUN: opt %s -mtriple=r600 -slp-vectorizer -slp-vectorize-hor-store -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+; CHECK-LABEL:@main
+; CHECK: call <2 x float> @llvm.SI.load.const.v2f32
+define void @main([17 x <4 x i32>] addrspace(2)* byval, float addrspace(1)* %out) {
+entry:
+  %1 = getelementptr [17 x <4 x i32>] addrspace(2)* %0, i64 0, i32 0
+  %2 = load <4 x i32> addrspace(2)* %1, !tbaa !0
+  %3 = call float @llvm.SI.load.const(<4 x i32> %2, i32 0)
+  %4 = call float @llvm.SI.load.const(<4 x i32> %2, i32 8)
+  %5 = call float @llvm.SI.load.const(<4 x i32> %2, i32 4)
+  %6 = call float @llvm.SI.load.const(<4 x i32> %2, i32 12)
+  %7 = fmul float %3, %4
+  %8 = fmul float %5, %6
+  %9 = fadd float %7, %8
+  store float %9, float addrspace(1)* %out
+  ret void
+}
+
+
+!0 = metadata !{metadata !"const", null, i32 1}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<4 x i32>, i32) #0
+
+attributes #0 = { nounwind readnone }