summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Lejeune <vljn@ovi.com>2014-04-29 04:09:12 +0200
committerVincent Lejeune <vljn@ovi.com>2014-05-03 20:01:27 +0200
commitffada0b8ab8e2f6bd8a85e388347272789670444 (patch)
treed45808fb8667b1b70cc04c8f6c84ce46ea686508
parent37cf22968bb6a881590a9f1cf6b44cb5d62b7b5f (diff)
SLP: Add a TTI callback for intrinsics
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h14
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp34
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp46
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp47
4 files changed, 117 insertions, 24 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 79fe1dcae6..2e5c7702fd 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -22,6 +22,7 @@
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Pass.h"
#include "llvm/Support/DataTypes.h"
@@ -339,6 +340,19 @@ public:
/// set to false, it returns the number of scalar registers.
virtual unsigned getNumberOfRegisters(bool Vector) const;
+ /// \return true if the list of intrinsics can be vectorized.
+ virtual bool isIntrinsicListVectorizable(const ArrayRef<Value*> &) const;
+
+ /// \return the operands indexes that can be vectorized.
+ virtual SmallVector<unsigned, 2> getOperandsToVectorize(Value *VL) const;
+
+ /// \return The vectorized intrinsic from IIs.
+ /// Builder is the point of insertion.
+ /// Ops are vector operands built from getOperandsToVectorize information
+ virtual Value *vectorizesIntrinsic(IRBuilder<> &Builder,
+ const ArrayRef<Value *> &II,
+ const ArrayRef<Value *> &Ops) const;
+
/// \return The width of the largest scalar or vector register type.
virtual unsigned getRegisterBitWidth(bool Vector) const;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index cdb0b79fd7..e387322e67 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -163,6 +163,22 @@ unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
return PrevTTI->getNumberOfRegisters(Vector);
}
+bool TargetTransformInfo::isIntrinsicListVectorizable(
+ const ArrayRef<Value *> &Vector) const {
+ return PrevTTI->isIntrinsicListVectorizable(Vector);
+}
+
+SmallVector<unsigned, 2> TargetTransformInfo::getOperandsToVectorize(Value *VL)
+ const {
+ return PrevTTI->getOperandsToVectorize(VL);
+}
+
+Value *TargetTransformInfo::vectorizesIntrinsic(IRBuilder<> &Builder,
+ const ArrayRef<Value *> &II,
+ const ArrayRef<Value *> &Operands) const {
+ return PrevTTI->vectorizesIntrinsic(Builder, II, Operands);
+}
+
unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
return PrevTTI->getRegisterBitWidth(Vector);
}
@@ -554,6 +570,24 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
return 8;
}
+ virtual bool isIntrinsicListVectorizable(const ArrayRef<Value*> &VL) const
+ override {
+ return false;
+ }
+
+ virtual SmallVector<unsigned, 2> getOperandsToVectorize(Value *) const
+ override {
+ return SmallVector<unsigned, 2>();
+ }
+
+ virtual Value *vectorizesIntrinsic(IRBuilder<> &Builder,
+ const ArrayRef<Value *> &II,
+ const ArrayRef<Value *> &Ops) const
+ override {
+ llvm_unreachable("NoTTI cannot vectorize intrinsics");
+ return 0;
+ }
+
unsigned getRegisterBitWidth(bool Vector) const override {
return 32;
}
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 6ad1f28bb2..945a89237a 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -17,7 +17,9 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/VectorUtils.h"
#include <utility>
using namespace llvm;
@@ -89,6 +91,11 @@ public:
/// @{
unsigned getNumberOfRegisters(bool Vector) const override;
+ bool isIntrinsicListVectorizable(const ArrayRef<Value *> &VL) const override;
+ SmallVector<unsigned, 2> getOperandsToVectorize(Value *VL) const override;
+ Value *vectorizesIntrinsic(IRBuilder<> &Builder,
+ const ArrayRef<Value *> &II,
+ const ArrayRef<Value *> &Ops) const override;
unsigned getMaximumUnrollFactor() const override;
unsigned getRegisterBitWidth(bool Vector) const override;
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
@@ -214,6 +221,45 @@ unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
return 1;
}
+bool BasicTTI::isIntrinsicListVectorizable(const ArrayRef<Value*> &VL)
+ const {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]);
+ Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
+ if (!isTriviallyVectorizable(ID))
+ return false;
+
+ // Check if the calls are all to the same vectorizable intrinsic.
+ Function *Int = II->getCalledFunction();
+
+ for (unsigned i = 1, e = VL.size(); i != e; ++i) {
+ IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[i]);
+ if (!II2 || II2->getCalledFunction() != Int)
+ return false;
+ }
+
+ return true;
+}
+
+SmallVector<unsigned, 2> BasicTTI::getOperandsToVectorize(Value *VL) const {
+ SmallVector<unsigned, 2> Operands;
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL);
+ for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i)
+ Operands.push_back(i);
+ return Operands;
+}
+
+Value *BasicTTI::vectorizesIntrinsic(IRBuilder<> &Builder,
+ const ArrayRef<Value *> &IIs,
+ const ArrayRef<Value *> &Ops) const {
+ assert (!IIs.empty());
+ IntrinsicInst *II = cast<IntrinsicInst>(IIs[0]);
+ Module *M = II->getParent()->getParent()->getParent();
+ Intrinsic::ID ID = II->getIntrinsicID();
+ Type *Tys[] = { VectorType::get(IIs[0]->getType(), IIs.size()) };
+ Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
+ return Builder.CreateCall(CF, Ops);
+}
+
unsigned BasicTTI::getRegisterBitWidth(bool Vector) const {
return 32;
}
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b49b1b0ff5..5d93f7d005 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -949,18 +949,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
case Instruction::Call: {
// Check if the calls are all to the same vectorizable intrinsic.
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]);
- Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
-
- if (!isTriviallyVectorizable(ID)) {
+ if (!TTI->isIntrinsicListVectorizable(VL)) {
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]);
Function *Int = II->getCalledFunction();
- for (unsigned i = 1, e = VL.size(); i != e; ++i) {
+ for (unsigned i = 0, e = VL.size(); i < e; i++) {
IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[i]);
if (!II2 || II2->getCalledFunction() != Int) {
newTreeEntry(VL, false);
@@ -971,12 +969,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
newTreeEntry(VL, true);
- for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) {
+ // Prepare the operand vector.
+ const SmallVector<unsigned, 2> &OpsIdx = TTI->getOperandsToVectorize(II);
+ for (unsigned i = 0, ie = OpsIdx.size(); i < ie; ++i) {
ValueList Operands;
// Prepare the operand vector.
- for (unsigned j = 0; j < VL.size(); ++j) {
+ for (unsigned j = 0, je = VL.size(); j < je; ++j) {
IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[j]);
- Operands.push_back(II2->getArgOperand(i));
+ Operands.push_back(II2->getArgOperand(OpsIdx[i]));
}
buildTree_rec(Operands, Depth + 1);
}
@@ -1137,10 +1137,12 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// Calculate the cost of the scalar and vector calls.
SmallVector<Type*, 4> ScalarTys, VecTys;
- for (unsigned op = 0, opc = II->getNumArgOperands(); op!= opc; ++op) {
- ScalarTys.push_back(CI->getArgOperand(op)->getType());
- VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(),
- VecTy->getNumElements()));
+ const SmallVector<unsigned, 2> &OpsIdx = TTI->getOperandsToVectorize(CI);
+ for (unsigned op = 0, opc = OpsIdx.size(); op!= opc; ++op) {
+ ScalarTys.push_back(CI->getArgOperand(OpsIdx[op])->getType());
+ VecTys.push_back(
+ VectorType::get(CI->getArgOperand(OpsIdx[op])->getType(),
+ VecTy->getNumElements()));
}
int ScalarCallCost = VecTy->getNumElements() *
@@ -1645,27 +1647,24 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E->Scalars);
+ std::vector<Value *> VL;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i)
+ VL.push_back(E->Scalars[i]);
std::vector<Value *> OpVecs;
- for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
+ const SmallVector<unsigned, 2> &OpsIdx = TTI->getOperandsToVectorize(CI);
+ for (int j = 0, je = OpsIdx.size(); j < je; ++j) {
ValueList OpVL;
- for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+ for (int i = 0, ie = E->Scalars.size(); i < ie; ++i) {
CallInst *CEI = cast<CallInst>(E->Scalars[i]);
- OpVL.push_back(CEI->getArgOperand(j));
+ OpVL.push_back(CEI->getArgOperand(OpsIdx[j]));
}
Value *OpVec = vectorizeTree(OpVL);
DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
}
-
- Module *M = F->getParent();
- IntrinsicInst *II = cast<IntrinsicInst>(CI);
- Intrinsic::ID ID = II->getIntrinsicID();
- Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
- Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
- Value *V = Builder.CreateCall(CF, OpVecs);
- E->VectorizedValue = V;
- return V;
+ E->VectorizedValue = TTI->vectorizesIntrinsic(Builder, VL, OpVecs);
+ return E->VectorizedValue;
}
default:
llvm_unreachable("unknown inst");