diff options
author | Vincent Lejeune <vljn@ovi.com> | 2014-04-29 04:09:12 +0200 |
---|---|---|
committer | Vincent Lejeune <vljn@ovi.com> | 2014-05-03 20:01:27 +0200 |
commit | ffada0b8ab8e2f6bd8a85e388347272789670444 (patch) | |
tree | d45808fb8667b1b70cc04c8f6c84ce46ea686508 | |
parent | 37cf22968bb6a881590a9f1cf6b44cb5d62b7b5f (diff) |
SLP: Add a TTI callback for intrinsics
-rw-r--r-- | include/llvm/Analysis/TargetTransformInfo.h | 14 | ||||
-rw-r--r-- | lib/Analysis/TargetTransformInfo.cpp | 34 | ||||
-rw-r--r-- | lib/CodeGen/BasicTargetTransformInfo.cpp | 46 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 47 |
4 files changed, 117 insertions, 24 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 79fe1dcae6..2e5c7702fd 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -22,6 +22,7 @@ #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Pass.h" #include "llvm/Support/DataTypes.h" @@ -339,6 +340,19 @@ public: /// set to false, it returns the number of scalar registers. virtual unsigned getNumberOfRegisters(bool Vector) const; + /// \return true if the list of intrinsics can be vectorized. + virtual bool isIntrinsicListVectorizable(const ArrayRef<Value*> &) const; + + /// \return the operands indexes that can be vectorized. + virtual SmallVector<unsigned, 2> getOperandsToVectorize(Value *VL) const; + + /// \return The vectorized intrinsic from IIs. + /// Builder is the point of insertion. + /// Ops are vector operands built from getOperandsToVectorize information + virtual Value *vectorizesIntrinsic(IRBuilder<> &Builder, + const ArrayRef<Value *> &II, + const ArrayRef<Value *> &Ops) const; + /// \return The width of the largest scalar or vector register type. virtual unsigned getRegisterBitWidth(bool Vector) const; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index cdb0b79fd7..e387322e67 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -163,6 +163,22 @@ unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { return PrevTTI->getNumberOfRegisters(Vector); } +bool TargetTransformInfo::isIntrinsicListVectorizable( + const ArrayRef<Value *> &Vector) const { + return PrevTTI->isIntrinsicListVectorizable(Vector); +} + +SmallVector<unsigned, 2> TargetTransformInfo::getOperandsToVectorize(Value *VL) + const { + return PrevTTI->getOperandsToVectorize(VL); +} + +Value *TargetTransformInfo::vectorizesIntrinsic(IRBuilder<> &Builder, + const ArrayRef<Value *> &II, + const ArrayRef<Value *> &Operands) const { + return PrevTTI->vectorizesIntrinsic(Builder, II, Operands); +} + unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return PrevTTI->getRegisterBitWidth(Vector); } @@ -554,6 +570,24 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { return 8; } + virtual bool isIntrinsicListVectorizable(const ArrayRef<Value*> &VL) const + override { + return false; + } + + virtual SmallVector<unsigned, 2> getOperandsToVectorize(Value *) const + override { + return SmallVector<unsigned, 2>(); + } + + virtual Value *vectorizesIntrinsic(IRBuilder<> &Builder, + const ArrayRef<Value *> &II, + const ArrayRef<Value *> &Ops) const + override { + llvm_unreachable("NoTTI cannot vectorize intrinsics"); + return 0; + } + unsigned getRegisterBitWidth(bool Vector) const override { return 32; } diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 6ad1f28bb2..945a89237a 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -17,7 +17,9 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/VectorUtils.h" #include <utility> using namespace llvm; @@ -89,6 +91,11 @@ public: /// @{ unsigned getNumberOfRegisters(bool Vector) const override; + bool isIntrinsicListVectorizable(const ArrayRef<Value *> &VL) const override; + SmallVector<unsigned, 2> getOperandsToVectorize(Value *VL) const override; + Value *vectorizesIntrinsic(IRBuilder<> &Builder, + const ArrayRef<Value *> &II, + const ArrayRef<Value *> &Ops) const override; unsigned getMaximumUnrollFactor() const override; unsigned getRegisterBitWidth(bool Vector) const override; unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, @@ -214,6 +221,45 @@ unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { return 1; } +bool BasicTTI::isIntrinsicListVectorizable(const ArrayRef<Value*> &VL) + const { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]); + Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic; + if (!isTriviallyVectorizable(ID)) + return false; + + // Check if the calls are all to the same vectorizable intrinsic. + Function *Int = II->getCalledFunction(); + + for (unsigned i = 1, e = VL.size(); i != e; ++i) { + IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[i]); + if (!II2 || II2->getCalledFunction() != Int) + return false; + } + + return true; +} + +SmallVector<unsigned, 2> BasicTTI::getOperandsToVectorize(Value *VL) const { + SmallVector<unsigned, 2> Operands; + IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL); + for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) + Operands.push_back(i); + return Operands; +} + +Value *BasicTTI::vectorizesIntrinsic(IRBuilder<> &Builder, + const ArrayRef<Value *> &IIs, + const ArrayRef<Value *> &Ops) const { + assert (!IIs.empty()); + IntrinsicInst *II = cast<IntrinsicInst>(IIs[0]); + Module *M = II->getParent()->getParent()->getParent(); + Intrinsic::ID ID = II->getIntrinsicID(); + Type *Tys[] = { VectorType::get(IIs[0]->getType(), IIs.size()) }; + Function *CF = Intrinsic::getDeclaration(M, ID, Tys); + return Builder.CreateCall(CF, Ops); +} + unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { return 32; } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index b49b1b0ff5..5d93f7d005 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -949,18 +949,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { } case Instruction::Call: { // Check if the calls are all to the same vectorizable intrinsic. - IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]); - Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic; - - if (!isTriviallyVectorizable(ID)) { + if (!TTI->isIntrinsicListVectorizable(VL)) { newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } + IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]); Function *Int = II->getCalledFunction(); - for (unsigned i = 1, e = VL.size(); i != e; ++i) { + for (unsigned i = 0, e = VL.size(); i < e; i++) { IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[i]); if (!II2 || II2->getCalledFunction() != Int) { newTreeEntry(VL, false); @@ -971,12 +969,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { } newTreeEntry(VL, true); - for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) { + // Prepare the operand vector. + const SmallVector<unsigned, 2> &OpsIdx = TTI->getOperandsToVectorize(II); + for (unsigned i = 0, ie = OpsIdx.size(); i < ie; ++i) { ValueList Operands; // Prepare the operand vector. - for (unsigned j = 0; j < VL.size(); ++j) { + for (unsigned j = 0, je = VL.size(); j < je; ++j) { IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[j]); - Operands.push_back(II2->getArgOperand(i)); + Operands.push_back(II2->getArgOperand(OpsIdx[i])); } buildTree_rec(Operands, Depth + 1); } @@ -1137,10 +1137,12 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // Calculate the cost of the scalar and vector calls. SmallVector<Type*, 4> ScalarTys, VecTys; - for (unsigned op = 0, opc = II->getNumArgOperands(); op!= opc; ++op) { - ScalarTys.push_back(CI->getArgOperand(op)->getType()); - VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(), - VecTy->getNumElements())); + const SmallVector<unsigned, 2> &OpsIdx = TTI->getOperandsToVectorize(CI); + for (unsigned op = 0, opc = OpsIdx.size(); op!= opc; ++op) { + ScalarTys.push_back(CI->getArgOperand(OpsIdx[op])->getType()); + VecTys.push_back( + VectorType::get(CI->getArgOperand(OpsIdx[op])->getType(), + VecTy->getNumElements())); } int ScalarCallCost = VecTy->getNumElements() * @@ -1645,27 +1647,24 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { CallInst *CI = cast<CallInst>(VL0); setInsertPointAfterBundle(E->Scalars); + std::vector<Value *> VL; + for (int i = 0, e = E->Scalars.size(); i < e; ++i) + VL.push_back(E->Scalars[i]); std::vector<Value *> OpVecs; - for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) { + const SmallVector<unsigned, 2> &OpsIdx = TTI->getOperandsToVectorize(CI); + for (int j = 0, je = OpsIdx.size(); j < je; ++j) { ValueList OpVL; - for (int i = 0, e = E->Scalars.size(); i < e; ++i) { + for (int i = 0, ie = E->Scalars.size(); i < ie; ++i) { CallInst *CEI = cast<CallInst>(E->Scalars[i]); - OpVL.push_back(CEI->getArgOperand(j)); + OpVL.push_back(CEI->getArgOperand(OpsIdx[j])); } Value *OpVec = vectorizeTree(OpVL); DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); } - - Module *M = F->getParent(); - IntrinsicInst *II = cast<IntrinsicInst>(CI); - Intrinsic::ID ID = II->getIntrinsicID(); - Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) }; - Function *CF = Intrinsic::getDeclaration(M, ID, Tys); - Value *V = Builder.CreateCall(CF, OpVecs); - E->VectorizedValue = V; - return V; + E->VectorizedValue = TTI->vectorizesIntrinsic(Builder, VL, OpVecs); + return E->VectorizedValue; } default: llvm_unreachable("unknown inst"); |