diff options
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 36 | ||||
-rw-r--r-- | test/Transforms/SLPVectorizer/X86/diamond.ll | 4 | ||||
-rw-r--r-- | test/Transforms/SLPVectorizer/X86/external_user.ll | 2 | ||||
-rw-r--r-- | test/Transforms/SLPVectorizer/X86/rgb_phi.ll | 2 |
4 files changed, 29 insertions, 15 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2cf843da5df..12316b4e597 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1320,6 +1320,9 @@ void BoUpSLP::vectorizeTree() { it != e; ++it) { Value *Scalar = it->Scalar; llvm::User *User = it->User; + + // Skip users that we already RAUW. This happens when one instruction + // has multiple uses of the same value. if (std::find(Scalar->use_begin(), Scalar->use_end(), User) == Scalar->use_end()) continue; @@ -1337,8 +1340,18 @@ void BoUpSLP::vectorizeTree() { Instruction *Loc = 0; if (PHINode *PN = dyn_cast<PHINode>(Vec)) { Loc = PN->getParent()->getFirstInsertionPt(); - } else if (Instruction *Iv = dyn_cast<Instruction>(Vec)){ - Loc = ++((BasicBlock::iterator)*Iv); + } else if (isa<Instruction>(Vec)){ + if (PHINode *PH = dyn_cast<PHINode>(User)) { + for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) { + if (PH->getIncomingValue(i) == Scalar) { + Loc = PH->getIncomingBlock(i)->getTerminator(); + break; + } + } + assert(Loc && "Unable to find incoming value for the PHI"); + } else { + Loc = cast<Instruction>(User); + } } else { Loc = F->getEntryBlock().begin(); } @@ -1433,24 +1446,25 @@ void BoUpSLP::optimizeGatherSequence() { BasicBlock *BB = *I; // For all instructions in the function: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - InsertElementInst *Insert = dyn_cast<InsertElementInst>(it); - if (!Insert || !GatherSeq.count(Insert)) + Instruction *In = it; + if ((!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) || + !GatherSeq.count(In)) continue; // Check if we can replace this instruction with any of the // visited instructions. for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(), ve = Visited.end(); v != ve; ++v) { - if (Insert->isIdenticalTo(*v) && - DT->dominates((*v)->getParent(), Insert->getParent())) { - Insert->replaceAllUsesWith(*v); - ToRemove.push_back(Insert); - Insert = 0; + if (In->isIdenticalTo(*v) && + DT->dominates((*v)->getParent(), In->getParent())) { + In->replaceAllUsesWith(*v); + ToRemove.push_back(In); + In = 0; break; } } - if (Insert) - Visited.insert(Insert); + if (In) + Visited.insert(In); } } diff --git a/test/Transforms/SLPVectorizer/X86/diamond.ll b/test/Transforms/SLPVectorizer/X86/diamond.ll index 2a237eaffdf..099f7cfb97e 100644 --- a/test/Transforms/SLPVectorizer/X86/diamond.ll +++ b/test/Transforms/SLPVectorizer/X86/diamond.ll @@ -51,8 +51,8 @@ entry: ; CHECK: @extr_user ; CHECK: load <4 x i32> -; CHECK-NEXT: extractelement <4 x i32> ; CHECK: store <4 x i32> +; CHECK: extractelement <4 x i32> ; CHECK-NEXT: ret define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) { entry: @@ -81,8 +81,8 @@ entry: ; In this example we have an external user that is not the first element in the vector. ; CHECK: @extr_user1 ; CHECK: load <4 x i32> -; CHECK-NEXT: extractelement <4 x i32> ; CHECK: store <4 x i32> +; CHECK: extractelement <4 x i32> ; CHECK-NEXT: ret define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) { entry: diff --git a/test/Transforms/SLPVectorizer/X86/external_user.ll b/test/Transforms/SLPVectorizer/X86/external_user.ll index 7f032b5b197..22f0e64be0a 100644 --- a/test/Transforms/SLPVectorizer/X86/external_user.ll +++ b/test/Transforms/SLPVectorizer/X86/external_user.ll @@ -26,9 +26,9 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK: phi <2 x double> ;CHECK: fadd <2 x double> ;CHECK: fmul <2 x double> -;CHECK: extractelement <2 x double> ;CHECK: br ;CHECK: store <2 x double> +;CHECK: extractelement <2 x double> ;CHECK: ret double define double @ext_user(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) { diff --git a/test/Transforms/SLPVectorizer/X86/rgb_phi.ll b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll index 6a9243560e8..9f5a6213c2f 100644 --- a/test/Transforms/SLPVectorizer/X86/rgb_phi.ll +++ b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll @@ -23,10 +23,10 @@ target triple = "i386-apple-macosx10.9.0" ;CHECK: fmul <3 x float> ;CHECK: fadd <3 x float> ; At the moment we don't sink extractelements. +;CHECK: br ;CHECK: extractelement ;CHECK: extractelement ;CHECK: extractelement -;CHECK: br ;CHECK: ret define float @foo(float* nocapture readonly %A) { |