summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-06-26 05:10:56 +0000
committerCraig Topper <craig.topper@gmail.com>2016-06-26 05:10:56 +0000
commitcaddfacae36484b74e045d3924851415a1983250 (patch)
treeeaa8ff3ec1f66cf30dec25d0d56da2f86a15a415
parente8326f9cc1dfdc2617c6f605646ba947c2f0948f (diff)
[X86] Rewrite lowerVectorShuffleWithPSHUFB to not require a ZeroableMask to be created. We can do everything with the starting mask and zeroable bit vector. This removes the last usage of isSingleInputShuffleMask. NFC
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273804 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp60
1 files changed, 21 insertions, 39 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7eb7ee01bb5..10d525a43ae 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7026,21 +7026,6 @@ static bool isNoopShuffleMask(ArrayRef<int> Mask) {
return true;
}
-/// \brief Helper function to classify a mask as a single-input mask.
-///
-/// This isn't a generic single-input test because in the vector shuffle
-/// lowering we canonicalize single inputs to be the first input operand. This
-/// means we can more quickly test for a single input by only checking whether
-/// an input from the second operand exists. We also assume that the size of
-/// mask corresponds to the size of the input vectors which isn't true in the
-/// fully general case.
-static bool isSingleInputShuffleMask(ArrayRef<int> Mask) {
- for (int M : Mask)
- if (M >= (int)Mask.size())
- return false;
- return true;
-}
-
/// \brief Test whether there are elements crossing 128-bit lanes in this
/// shuffle mask.
///
@@ -7254,16 +7239,6 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
return Zeroable;
}
-/// Mutate a shuffle mask, replacing zeroable elements with SM_SentinelZero.
-static void computeZeroableShuffleMask(MutableArrayRef<int> Mask,
- SDValue V1, SDValue V2) {
- SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- if (Mask[i] != SM_SentinelUndef && Zeroable[i])
- Mask[i] = SM_SentinelZero;
- }
-}
-
/// Try to lower a shuffle with a single PSHUFB of V1.
/// This is only possible if V2 is unused (at all, or only for zero elements).
static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
@@ -7271,34 +7246,41 @@ static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- const int NumBytes = VT.is128BitVector() ? 16 : 32;
+ int Size = Mask.size();
+ int LaneSize = 128 / VT.getScalarSizeInBits();
+ const int NumBytes = VT.getSizeInBits() / 8;
const int NumEltBytes = VT.getScalarSizeInBits() / 8;
assert((Subtarget.hasSSSE3() && VT.is128BitVector()) ||
(Subtarget.hasAVX2() && VT.is256BitVector()));
- SmallVector<int, 32> ZeroableMask(Mask.begin(), Mask.end());
- computeZeroableShuffleMask(ZeroableMask, V1, V2);
-
- if (!isSingleInputShuffleMask(ZeroableMask) ||
- is128BitLaneCrossingShuffleMask(VT, Mask))
- return SDValue();
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
SmallVector<SDValue, 32> PSHUFBMask(NumBytes);
// Sign bit set in i8 mask means zero element.
SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);
for (int i = 0; i < NumBytes; ++i) {
- int M = ZeroableMask[i / NumEltBytes];
- if (M == SM_SentinelUndef) {
+ int M = Mask[i / NumEltBytes];
+ if (M < 0) {
PSHUFBMask[i] = DAG.getUNDEF(MVT::i8);
- } else if (M == SM_SentinelZero) {
+ continue;
+ }
+ if (Zeroable[i / NumEltBytes]) {
PSHUFBMask[i] = ZeroMask;
- } else {
- M = M * NumEltBytes + (i % NumEltBytes);
- M = i < 16 ? M : M - 16;
- PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8);
+ continue;
}
+ // Only allow V1.
+ if (M >= Size)
+ return SDValue();
+
+ // PSHUFB can't cross lanes, ensure this doesn't happen.
+ if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize))
+ return SDValue();
+
+ M = M % LaneSize;
+ M = M * NumEltBytes + (i % NumEltBytes);
+ PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8);
}
MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes);