diff options
-rw-r--r-- | lib/Target/R600/AMDGPUTargetTransformInfo.cpp | 73 | ||||
-rw-r--r-- | test/Transforms/SLPVectorizer/R600/intrinsics.ll | 28 |
2 files changed, 101 insertions, 0 deletions
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp index ea78f43158..9c4d7c038b 100644 --- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp @@ -20,6 +20,8 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" @@ -62,6 +64,77 @@ public: TargetTransformInfo::getAnalysisUsage(AU); } + bool isIntrinsicListVectorizable(const ArrayRef<Value*> &VL) const override { + IntrinsicInst *VL0 = cast<IntrinsicInst>(VL[0]); + ConstantInt *Cst = dyn_cast<ConstantInt>(VL0->getArgOperand(1)); + if (!Cst) + return false; + unsigned VL0Offset = Cst->getZExtValue(); + for (unsigned i = 0, e = VL.size(); i < e; i++) { + IntrinsicInst *Inst = cast<IntrinsicInst>(VL[i]); + if (Inst->getCalledFunction() != VL0->getCalledFunction()) + return false; + // Check if it's the same buffer + if (Inst->getArgOperand(0) != VL0->getArgOperand(0)) + return false; + ConstantInt *Offset = dyn_cast<ConstantInt>(Inst->getArgOperand(1)); + if (!Offset || Offset->getZExtValue() - VL0Offset != 4 * i) + return false; + } + return true; + } + + SmallVector<unsigned, 2> getOperandsToVectorize(Value *VL) const override { + return SmallVector<unsigned, 2>(); + } + + Value *vectorizesIntrinsic(IRBuilder<> &Builder, + const ArrayRef<Value *> &II, + const ArrayRef<Value *> &Ops) const override { + IntrinsicInst *Inst = cast<IntrinsicInst>(II[0]); + + Module *M = Inst->getParent()->getParent()->getParent(); + Function *Fn = M->getFunction("llvm.SI.load.const.v2f32"); + if (!Fn) { + Type *IntType = Type::getInt32Ty(M->getContext()); + + Type *Tys[] = { + VectorType::get(IntType, 4), + IntType + }; + + Type *V2F32 = VectorType::get(Type::getFloatTy(M->getContext()), 2); + FunctionType *FT = FunctionType::get(V2F32, Tys, false); + Fn = Function::Create(FT, GlobalValue::ExternalLinkage, + "llvm.SI.load.const.v2f32", M); + Fn->addFnAttr(Attribute::ReadNone); + } + + Value *Operands[] = { + Inst->getArgOperand(0), + Inst->getArgOperand(1), + }; + + return Builder.CreateCall(Fn, Operands); + } + + unsigned getArithmeticInstrCost(unsigned, Type *Ty, + OperandValueKind, + OperandValueKind) const override { + if (Ty->isVectorTy()) + return dyn_cast<VectorType>(Ty)->getVectorNumElements(); + return 1; + } + + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Type *> Tys) const override { + return 1; + } + + unsigned getVectorInstrCost(unsigned, Type *, unsigned ) const override { + return 0; + } + /// Pass identification. static char ID; diff --git a/test/Transforms/SLPVectorizer/R600/intrinsics.ll b/test/Transforms/SLPVectorizer/R600/intrinsics.ll new file mode 100644 index 0000000000..d9ae9086ef --- /dev/null +++ b/test/Transforms/SLPVectorizer/R600/intrinsics.ll @@ -0,0 +1,28 @@ +; RUN: opt %s -mtriple=r600 -slp-vectorizer -slp-vectorize-hor-store -S | FileCheck %s + +target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" + +; CHECK-LABEL:@main +; CHECK: call <2 x float> @llvm.SI.load.const.v2f32 +define void @main([17 x <4 x i32>] addrspace(2)* byval, float addrspace(1)* %out) { +entry: + %1 = getelementptr [17 x <4 x i32>] addrspace(2)* %0, i64 0, i32 0 + %2 = load <4 x i32> addrspace(2)* %1, !tbaa !0 + %3 = call float @llvm.SI.load.const(<4 x i32> %2, i32 0) + %4 = call float @llvm.SI.load.const(<4 x i32> %2, i32 8) + %5 = call float @llvm.SI.load.const(<4 x i32> %2, i32 4) + %6 = call float @llvm.SI.load.const(<4 x i32> %2, i32 12) + %7 = fmul float %3, %4 + %8 = fmul float %5, %6 + %9 = fadd float %7, %8 + store float %9, float addrspace(1)* %out + ret void +} + + +!0 = metadata !{metadata !"const", null, i32 1} + +; Function Attrs: nounwind readnone +declare float @llvm.SI.load.const(<4 x i32>, i32) #0 + +attributes #0 = { nounwind readnone } |