summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Lejeune <vljn@ovi.com>2014-04-29 04:09:51 +0200
committerVincent Lejeune <vljn@ovi.com>2014-05-03 20:01:27 +0200
commitbec6a3df72278c15fdac47d9ad43a7de3576d89c (patch)
treea03e31d50b0d565ba43fef6be1e984f40df170af
parentffada0b8ab8e2f6bd8a85e388347272789670444 (diff)
R600/SI: Add support for new vector intrinsic related callbacksradeonsi-backup2
-rw-r--r--lib/Target/R600/AMDGPUTargetTransformInfo.cpp73
-rw-r--r--test/Transforms/SLPVectorizer/R600/intrinsics.ll28
2 files changed, 101 insertions, 0 deletions
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index ea78f43158..9c4d7c038b 100644
--- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -20,6 +20,8 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
@@ -62,6 +64,77 @@ public:
TargetTransformInfo::getAnalysisUsage(AU);
}
+ bool isIntrinsicListVectorizable(const ArrayRef<Value*> &VL) const override {
+ IntrinsicInst *VL0 = cast<IntrinsicInst>(VL[0]);
+ ConstantInt *Cst = dyn_cast<ConstantInt>(VL0->getArgOperand(1));
+ if (!Cst)
+ return false;
+ unsigned VL0Offset = Cst->getZExtValue();
+ for (unsigned i = 0, e = VL.size(); i < e; i++) {
+ IntrinsicInst *Inst = cast<IntrinsicInst>(VL[i]);
+ if (Inst->getCalledFunction() != VL0->getCalledFunction())
+ return false;
+ // Check if it's the same buffer
+ if (Inst->getArgOperand(0) != VL0->getArgOperand(0))
+ return false;
+ ConstantInt *Offset = dyn_cast<ConstantInt>(Inst->getArgOperand(1));
+ if (!Offset || Offset->getZExtValue() - VL0Offset != 4 * i)
+ return false;
+ }
+ return true;
+ }
+
+ SmallVector<unsigned, 2> getOperandsToVectorize(Value *VL) const override {
+ return SmallVector<unsigned, 2>();
+ }
+
+ Value *vectorizesIntrinsic(IRBuilder<> &Builder,
+ const ArrayRef<Value *> &II,
+ const ArrayRef<Value *> &Ops) const override {
+ IntrinsicInst *Inst = cast<IntrinsicInst>(II[0]);
+
+ Module *M = Inst->getParent()->getParent()->getParent();
+ Function *Fn = M->getFunction("llvm.SI.load.const.v2f32");
+ if (!Fn) {
+ Type *IntType = Type::getInt32Ty(M->getContext());
+
+ Type *Tys[] = {
+ VectorType::get(IntType, 4),
+ IntType
+ };
+
+ Type *V2F32 = VectorType::get(Type::getFloatTy(M->getContext()), 2);
+ FunctionType *FT = FunctionType::get(V2F32, Tys, false);
+ Fn = Function::Create(FT, GlobalValue::ExternalLinkage,
+ "llvm.SI.load.const.v2f32", M);
+ Fn->addFnAttr(Attribute::ReadNone);
+ }
+
+ Value *Operands[] = {
+ Inst->getArgOperand(0),
+ Inst->getArgOperand(1),
+ };
+
+ return Builder.CreateCall(Fn, Operands);
+ }
+
+ unsigned getArithmeticInstrCost(unsigned, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const override {
+ if (Ty->isVectorTy())
+ return dyn_cast<VectorType>(Ty)->getVectorNumElements();
+ return 1;
+ }
+
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type *> Tys) const override {
+ return 1;
+ }
+
+ unsigned getVectorInstrCost(unsigned, Type *, unsigned ) const override {
+ return 0;
+ }
+
/// Pass identification.
static char ID;
diff --git a/test/Transforms/SLPVectorizer/R600/intrinsics.ll b/test/Transforms/SLPVectorizer/R600/intrinsics.ll
new file mode 100644
index 0000000000..d9ae9086ef
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/R600/intrinsics.ll
@@ -0,0 +1,28 @@
+; RUN: opt %s -mtriple=r600 -slp-vectorizer -slp-vectorize-hor-store -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+; CHECK-LABEL:@main
+; CHECK: call <2 x float> @llvm.SI.load.const.v2f32
+define void @main([17 x <4 x i32>] addrspace(2)* byval, float addrspace(1)* %out) {
+entry:
+ %1 = getelementptr [17 x <4 x i32>] addrspace(2)* %0, i64 0, i32 0
+ %2 = load <4 x i32> addrspace(2)* %1, !tbaa !0
+ %3 = call float @llvm.SI.load.const(<4 x i32> %2, i32 0)
+ %4 = call float @llvm.SI.load.const(<4 x i32> %2, i32 8)
+ %5 = call float @llvm.SI.load.const(<4 x i32> %2, i32 4)
+ %6 = call float @llvm.SI.load.const(<4 x i32> %2, i32 12)
+ %7 = fmul float %3, %4
+ %8 = fmul float %5, %6
+ %9 = fadd float %7, %8
+ store float %9, float addrspace(1)* %out
+ ret void
+}
+
+
+!0 = metadata !{metadata !"const", null, i32 1}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<4 x i32>, i32) #0
+
+attributes #0 = { nounwind readnone }