diff options
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 119 | ||||
-rw-r--r-- | test/Transforms/SROA/basictest.ll | 63 |
2 files changed, 161 insertions, 21 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 5a9247ea1b..e3182d319c 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -1723,6 +1723,54 @@ static bool isVectorPromotionViable(const TargetData &TD, return true; } +/// \brief Test whether the given alloca partition can be promoted to an int. +/// +/// This is a quick test to check whether we can rewrite a particular alloca +/// partition (and its newly formed alloca) into an integer alloca suitable for +/// promotion to an SSA value. We only can ensure this for a limited set of +/// operations, and we don't want to do the rewrites unless we are confident +/// that the result will be promotable, so we have an early test here. +static bool isIntegerPromotionViable(const TargetData &TD, + Type *AllocaTy, + AllocaPartitioning &P, + AllocaPartitioning::const_use_iterator I, + AllocaPartitioning::const_use_iterator E) { + IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy); + if (!Ty) + return false; + + // Check the uses to ensure the uses are (likely) promoteable integer uses. + // Also ensure that the alloca has a covering load or store. We don't want + // promote because of some other unsplittable entry (which we may make + // splittable later) and lose the ability to promote each element access. + bool WholeAllocaOp = false; + for (; I != E; ++I) { + if (LoadInst *LI = dyn_cast<LoadInst>(&*I->User)) { + if (LI->isVolatile() || !LI->getType()->isIntegerTy()) + return false; + if (LI->getType() == Ty) + WholeAllocaOp = true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(&*I->User)) { + if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy()) + return false; + if (SI->getValueOperand()->getType() == Ty) + WholeAllocaOp = true; + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&*I->User)) { + if (MI->isVolatile()) + return false; + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(&*I->User)) { + const AllocaPartitioning::MemTransferOffsets &MTO + = P.getMemTransferOffsets(*MTI); + if (!MTO.IsSplittable) + return false; + } + } else { + return false; + } + } + return WholeAllocaOp; +} + namespace { /// \brief Visitor to rewrite instructions using a partition of an alloca to /// use a new alloca. @@ -1754,6 +1802,12 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, Type *ElementTy; uint64_t ElementSize; + // This is a convenience and flag variable that will be null unless the new + // alloca has a promotion-targeted integer type due to passing + // isIntegerPromotionViable above. If it is non-null does, the desired + // integer type will be stored here for easy access during rewriting. + IntegerType *IntPromotionTy; + // The offset of the partition user currently being rewritten. uint64_t BeginOffset, EndOffset; Instruction *OldPtr; @@ -1770,7 +1824,7 @@ public: OldAI(OldAI), NewAI(NewAI), NewAllocaBeginOffset(NewBeginOffset), NewAllocaEndOffset(NewEndOffset), - VecTy(), ElementTy(), ElementSize(), + VecTy(), ElementTy(), ElementSize(), IntPromotionTy(), BeginOffset(), EndOffset() { } @@ -1786,6 +1840,9 @@ public: assert((VecTy->getScalarSizeInBits() % 8) == 0 && "Only multiple-of-8 sized vector elements are viable"); ElementSize = VecTy->getScalarSizeInBits() / 8; + } else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(), + P, I, E)) { + IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType()); } bool CanSROA = true; for (; I != E; ++I) { @@ -1830,6 +1887,43 @@ private: return IRB.getInt32(Index); } + Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy, + uint64_t Offset) { + assert(IntPromotionTy && "Alloca is not an integer we can extract from"); + Value *V = IRB.CreateLoad(&NewAI, getName(".load")); + assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); + uint64_t RelOffset = Offset - NewAllocaBeginOffset; + if (RelOffset) + V = IRB.CreateLShr(V, RelOffset*8, getName(".shift")); + if (TargetTy != IntPromotionTy) { + assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() && + "Cannot extract to a larger integer!"); + V = IRB.CreateTrunc(V, TargetTy, getName(".trunc")); + } + return V; + } + + StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) { + IntegerType *Ty = cast<IntegerType>(V->getType()); + if (Ty == IntPromotionTy) + return IRB.CreateStore(V, &NewAI); + + assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() && + "Cannot insert a larger integer!"); + V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext")); + assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); + uint64_t RelOffset = Offset - NewAllocaBeginOffset; + if (RelOffset) + V = IRB.CreateShl(V, RelOffset*8, getName(".shift")); + + APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth()) + .shl(RelOffset*8); + Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")), + Mask, getName(".mask")); + return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")), + &NewAI); + } + void deleteIfTriviallyDead(Value *V) { Instruction *I = cast<Instruction>(V); if (isInstructionTriviallyDead(I)) @@ -1865,6 +1959,16 @@ private: return true; } + bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { + assert(!LI.isVolatile()); + Value *Result = extractInteger(IRB, cast<IntegerType>(LI.getType()), + BeginOffset); + LI.replaceAllUsesWith(Result); + Pass.DeadInsts.push_back(&LI); + DEBUG(dbgs() << " to: " << *Result << "\n"); + return true; + } + bool visitLoadInst(LoadInst &LI) { DEBUG(dbgs() << " original: " << LI << "\n"); Value *OldOp = LI.getOperand(0); @@ -1873,6 +1977,8 @@ private: if (VecTy) return rewriteVectorizedLoadInst(IRB, LI, OldOp); + if (IntPromotionTy) + return rewriteIntegerLoad(IRB, LI); Value *NewPtr = getAdjustedAllocaPtr(IRB, LI.getPointerOperand()->getType()); @@ -1904,6 +2010,15 @@ private: return true; } + bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) { + assert(!SI.isVolatile()); + StoreInst *Store = insertInteger(IRB, SI.getValueOperand(), BeginOffset); + Pass.DeadInsts.push_back(&SI); + (void)Store; + DEBUG(dbgs() << " to: " << *Store << "\n"); + return true; + } + bool visitStoreInst(StoreInst &SI) { DEBUG(dbgs() << " original: " << SI << "\n"); Value *OldOp = SI.getOperand(1); @@ -1912,6 +2027,8 @@ private: if (VecTy) return rewriteVectorizedStoreInst(IRB, SI, OldOp); + if (IntPromotionTy) + return rewriteIntegerStore(IRB, SI); Value *NewPtr = getAdjustedAllocaPtr(IRB, SI.getPointerOperand()->getType()); diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index be3ef64dc2..a61de05f45 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -553,30 +553,53 @@ bad: ret i32 %Z2 } -define i32 @test12() { -; CHECK: @test12 -; CHECK: alloca i24 -; -; FIXME: SROA should promote accesses to this into whole i24 operations instead -; of i8 operations. -; CHECK: store i8 0 -; CHECK: store i8 0 -; CHECK: store i8 0 +define i8 @test12() { +; We fully promote these to the i24 load or store size, resulting in just masks +; and other operations that instcombine will fold, but no alloca. ; -; CHECK: load i24* +; CHECK: @test12 entry: %a = alloca [3 x i8] - %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0 - store i8 0, i8* %b0ptr - %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1 - store i8 0, i8* %b1ptr - %b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2 - store i8 0, i8* %b2ptr - %iptr = bitcast [3 x i8]* %a to i24* - %i = load i24* %iptr - %ret = zext i24 %i to i32 - ret i32 %ret + %b = alloca [3 x i8] +; CHECK-NOT: alloca + + %a0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0 + store i8 0, i8* %a0ptr + %a1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1 + store i8 0, i8* %a1ptr + %a2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2 + store i8 0, i8* %a2ptr + %aiptr = bitcast [3 x i8]* %a to i24* + %ai = load i24* %aiptr +; CHCEK-NOT: store +; CHCEK-NOT: load +; CHECK: %[[mask0:.*]] = and i24 undef, -256 +; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281 +; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], 65535 + + %biptr = bitcast [3 x i8]* %b to i24* + store i24 %ai, i24* %biptr + %b0ptr = getelementptr [3 x i8]* %b, i64 0, i32 0 + %b0 = load i8* %b0ptr + %b1ptr = getelementptr [3 x i8]* %b, i64 0, i32 1 + %b1 = load i8* %b1ptr + %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2 + %b2 = load i8* %b2ptr +; CHCEK-NOT: store +; CHCEK-NOT: load +; CHECK: %[[trunc0:.*]] = trunc i24 %[[mask2]] to i8 +; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8 +; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8 +; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[mask2]], 16 +; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8 + + %bsum0 = add i8 %b0, %b1 + %bsum1 = add i8 %bsum0, %b2 + ret i8 %bsum1 +; CHECK: %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]] +; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]] +; CHECK-NEXT: ret i8 %[[sum1]] } define i32 @test13() { |