summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-07-11 05:36:48 +0000
committerCraig Topper <craig.topper@gmail.com>2016-07-11 05:36:48 +0000
commitb6d6904481045bb31db0804692b413d78fc247b9 (patch)
tree853c92e003bab7eefabe52fb54784c52e4096851
parentc09e328b81a45a270f75e9c5be41ba0971f05757 (diff)
[AVX512] Use vpternlog with an immediate of 0xff to create 512-bit all one vectors.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275045 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--lib/Target/X86/X86InstrAVX512.td2
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp16
-rw-r--r--test/CodeGen/X86/avx512-build-vector.ll7
-rw-r--r--test/CodeGen/X86/avx512-calling-conv.ll22
-rw-r--r--test/CodeGen/X86/avx512-cvt.ll6
-rw-r--r--test/CodeGen/X86/avx512-ext.ll6
-rw-r--r--test/CodeGen/X86/avx512-mask-op.ll110
-rw-r--r--test/CodeGen/X86/avx512-vbroadcast.ll3
-rw-r--r--test/CodeGen/X86/avx512-vec-cmp.ll6
-rw-r--r--test/CodeGen/X86/masked_memop.ll9
-rw-r--r--test/CodeGen/X86/vector-compare-results.ll12
-rw-r--r--test/CodeGen/X86/vector-sext.ll33
-rw-r--r--test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll4
-rw-r--r--test/CodeGen/X86/vector-shuffle-v1.ll88
15 files changed, 193 insertions, 137 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4cbffcc2f21..8cb528ec9e3 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6554,11 +6554,11 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
// vpcmpeqd on 256-bit vectors.
if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
- if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget.hasInt256()))
+ if (VT == MVT::v4i32 || VT == MVT::v16i32 ||
+ (VT == MVT::v8i32 && Subtarget.hasInt256()))
return Op;
- if (!VT.is512BitVector())
- return getOnesVector(VT, Subtarget, DAG, DL);
+ return getOnesVector(VT, Subtarget, DAG, DL);
}
return SDValue();
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 357b5179718..0b50b82b154 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -416,6 +416,8 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllZerosV))]>;
+def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
+ [(set VR512:$dst, (v16i32 immAllOnesV))]>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index cc0388bb0db..a6a1714c21a 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -5547,6 +5547,15 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
+ case X86::AVX512_512_SETALLONES: {
+ unsigned Reg = MIB->getOperand(0).getReg();
+ MIB->setDesc(get(X86::VPTERNLOGDZrri));
+ // VPTERNLOGD needs 3 register inputs and an immediate.
+ // 0xff will return 1s for any input.
+ MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef).addImm(0xff);
+ return true;
+ }
case X86::TEST8ri_NOREX:
MI.setDesc(get(X86::TEST8ri));
return true;
@@ -6231,6 +6240,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
else
switch (LoadMI.getOpcode()) {
case X86::AVX512_512_SET0:
+ case X86::AVX512_512_SETALLONES:
Alignment = 64;
break;
case X86::AVX2_SETALLONES:
@@ -6281,6 +6291,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_128_SET0:
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0:
+ case X86::AVX512_512_SETALLONES:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
@@ -6312,7 +6323,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
Ty = Type::getFloatTy(MF.getFunction()->getContext());
else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
- else if (Opc == X86::AVX512_512_SET0)
+ else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()),16);
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
Opc == X86::AVX512_256_SET0)
@@ -6320,7 +6331,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
- bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES);
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
+ Opc == X86::AVX512_512_SETALLONES);
const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
diff --git a/test/CodeGen/X86/avx512-build-vector.ll b/test/CodeGen/X86/avx512-build-vector.ll
index 0f89aa71162..980b87187d9 100644
--- a/test/CodeGen/X86/avx512-build-vector.ll
+++ b/test/CodeGen/X86/avx512-build-vector.ll
@@ -4,7 +4,8 @@
define <16 x i32> @test2(<16 x i32> %x) {
; CHECK-LABEL: test2:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = add <16 x i32><i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %x
ret <16 x i32>%res
@@ -15,8 +16,8 @@ define <16 x float> @test3(<4 x float> %a) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vmovss %xmm0, %xmm2, %xmm0
-; CHECK-NEXT: vmovss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm1[1,0],xmm0[0,1]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
diff --git a/test/CodeGen/X86/avx512-calling-conv.ll b/test/CodeGen/X86/avx512-calling-conv.ll
index 303e7ac5182..35e7448e09b 100644
--- a/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/test/CodeGen/X86/avx512-calling-conv.ll
@@ -30,7 +30,8 @@ define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -52,7 +53,8 @@ define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
; KNL_X32-NEXT: vpslld $31, %zmm0, %zmm0
; KNL_X32-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_X32-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
-; KNL_X32-NEXT: vpbroadcastd LCPI1_0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL_X32-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0
; KNL_X32-NEXT: retl
%c = and <16 x i1>%a, %b
@@ -68,7 +70,8 @@ define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1}
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqw %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -91,7 +94,7 @@ define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
; KNL_X32-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL_X32-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1}
-; KNL_X32-NEXT: vpbroadcastd LCPI2_1, %zmm0
+; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; KNL_X32-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovqw %zmm0, %xmm0
; KNL_X32-NEXT: retl
@@ -183,7 +186,8 @@ define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
; KNL-NEXT: Ltmp1:
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: callq _func16xi1
; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
@@ -212,7 +216,8 @@ define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
; KNL_X32-NEXT: Ltmp1:
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; KNL_X32-NEXT: vpbroadcastd LCPI5_0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL_X32-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0
; KNL_X32-NEXT: calll _func16xi1
; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
@@ -286,7 +291,8 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
; KNL-NEXT: movb $85, %al
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1}
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqw %zmm0, %xmm0
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
@@ -322,7 +328,7 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
; KNL_X32-NEXT: movb $85, %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1}
-; KNL_X32-NEXT: vpbroadcastd LCPI7_1, %zmm0
+; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; KNL_X32-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovqw %zmm0, %xmm0
; KNL_X32-NEXT: addl $12, %esp
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 57e8a134151..914f859927b 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -685,7 +685,8 @@ define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
; KNL: ## BB#0:
; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vcvtdq2ps %zmm0, %zmm0
; KNL-NEXT: retq
;
@@ -748,7 +749,8 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
; KNL: ## BB#0:
; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1
; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0
; KNL-NEXT: retq
diff --git a/test/CodeGen/X86/avx512-ext.ll b/test/CodeGen/X86/avx512-ext.ll
index a944e85f71b..dac40b99428 100644
--- a/test/CodeGen/X86/avx512-ext.ll
+++ b/test/CodeGen/X86/avx512-ext.ll
@@ -1409,7 +1409,8 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
; KNL-NEXT: knotw %k0, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: retq
;
@@ -1465,7 +1466,8 @@ define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
; KNL-LABEL: sext_16i1_16i32:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: sext_16i1_16i32:
diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll
index b867297df74..af41de109e1 100644
--- a/test/CodeGen/X86/avx512-mask-op.ll
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -349,7 +349,8 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
; KNL-NEXT: LBB17_1:
; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
; KNL-NEXT: LBB17_3:
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -386,7 +387,8 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
; KNL-NEXT: LBB18_3:
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -475,7 +477,8 @@ define <16 x i1> @test15(i32 %x, i32 %y) {
; KNL-NEXT: movw $1, %cx
; KNL-NEXT: cmovgw %ax, %cx
; KNL-NEXT: kmovw %ecx, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -512,25 +515,25 @@ define <64 x i8> @test16(i64 %x) {
; KNL-NEXT: movl %edi, (%rsp)
; KNL-NEXT: shrq $32, %rdi
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; KNL-NEXT: movl {{.*}}(%rip), %eax
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; KNL-NEXT: kmovw (%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm0, %xmm0
-; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; KNL-NEXT: movl $1, %ecx
-; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
-; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm2
+; KNL-NEXT: movl $1, %eax
+; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7]
+; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm2 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm2, %xmm2
-; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
+; KNL-NEXT: vpsllw $7, %ymm2, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
@@ -570,30 +573,30 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; KNL-NEXT: movl %edi, (%rsp)
; KNL-NEXT: shrq $32, %rdi
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; KNL-NEXT: movl {{.*}}(%rip), %eax
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; KNL-NEXT: kmovw (%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; KNL-NEXT: xorl %ecx, %ecx
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
+; KNL-NEXT: xorl %eax, %eax
; KNL-NEXT: cmpl %edx, %esi
-; KNL-NEXT: setg %cl
-; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
-; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; KNL-NEXT: setg %al
+; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm2 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm2, %xmm2
-; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
@@ -628,7 +631,8 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $7, %k2, %k1
; KNL-NEXT: korw %k1, %k0, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqw %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -1368,7 +1372,8 @@ define <8 x i64> @load_8i1(<8 x i1>* %a) {
; KNL: ## BB#0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: load_8i1:
@@ -1385,7 +1390,8 @@ define <16 x i32> @load_16i1(<16 x i1>* %a) {
; KNL-LABEL: load_16i1:
; KNL: ## BB#0:
; KNL-NEXT: kmovw (%rdi), %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: load_16i1:
@@ -1403,7 +1409,8 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) {
; KNL: ## BB#0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
@@ -1422,7 +1429,8 @@ define <4 x i16> @load_4i1(<4 x i1>* %a) {
; KNL: ## BB#0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
@@ -1441,11 +1449,11 @@ define <32 x i16> @load_32i1(<32 x i1>* %a) {
; KNL-LABEL: load_32i1:
; KNL: ## BB#0:
; KNL-NEXT: kmovw (%rdi), %k1
-; KNL-NEXT: movl {{.*}}(%rip), %eax
-; KNL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: kmovw 2(%rdi), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm1, %ymm1
; KNL-NEXT: retq
;
@@ -1463,20 +1471,20 @@ define <64 x i8> @load_64i1(<64 x i1>* %a) {
; KNL-LABEL: load_64i1:
; KNL: ## BB#0:
; KNL-NEXT: kmovw (%rdi), %k1
-; KNL-NEXT: movl {{.*}}(%rip), %eax
-; KNL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: kmovw 2(%rdi), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm2, %xmm2
+; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; KNL-NEXT: kmovw 4(%rdi), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: kmovw 6(%rdi), %k1
-; KNL-NEXT: vpbroadcastd %eax, %zmm2 {%k1} {z}
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm2, %xmm2
-; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT: kmovw 6(%rdi), %k1
+; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; KNL-NEXT: retq
;
; SKX-LABEL: load_64i1:
diff --git a/test/CodeGen/X86/avx512-vbroadcast.ll b/test/CodeGen/X86/avx512-vbroadcast.ll
index 6a7ed02e031..299b990f625 100644
--- a/test/CodeGen/X86/avx512-vbroadcast.ll
+++ b/test/CodeGen/X86/avx512-vbroadcast.ll
@@ -218,7 +218,8 @@ define <16 x i32> @test_vbroadcast() {
; ALL: # BB#0: # %entry
; ALL-NEXT: vpxord %zmm0, %zmm0, %zmm0
; ALL-NEXT: vcmpunordps %zmm0, %zmm0, %k1
-; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; ALL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; ALL-NEXT: knotw %k1, %k1
; ALL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; ALL-NEXT: retq
diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll
index 49aea228182..69be3685ecd 100644
--- a/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -865,7 +865,8 @@ define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1
; KNL-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
; KNL-NEXT: kxnorw %k1, %k0, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: retq
;
@@ -889,7 +890,8 @@ define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32>
; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; KNL-NEXT: vpcmpgtd %zmm3, %zmm2, %k1
; KNL-NEXT: kxorw %k1, %k0, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll
index c31b8381aeb..4220308b008 100644
--- a/test/CodeGen/X86/masked_memop.ll
+++ b/test/CodeGen/X86/masked_memop.ll
@@ -2473,7 +2473,8 @@ define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x
; AVX512F-NEXT: ## BB#31: ## %cond.load43
; AVX512F-NEXT: vpinsrb $15, 15(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: LBB50_32: ## %else44
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
@@ -5676,7 +5677,8 @@ define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i1
; AVX512F-NEXT: ## BB#15: ## %cond.load19
; AVX512F-NEXT: vpinsrw $7, 14(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: LBB53_16: ## %else20
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm1, %xmm1
; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpxor %xmm2, %xmm1, %xmm2
@@ -6116,7 +6118,8 @@ define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16
; AVX512F-NEXT: vpinsrw $7, 30(%rdi), %xmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: LBB54_32: ## %else44
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: retq
diff --git a/test/CodeGen/X86/vector-compare-results.ll b/test/CodeGen/X86/vector-compare-results.ll
index 16bf596f3bb..9c89d0129f8 100644
--- a/test/CodeGen/X86/vector-compare-results.ll
+++ b/test/CodeGen/X86/vector-compare-results.ll
@@ -706,7 +706,8 @@ define <8 x i1> @test_cmp_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind {
; AVX512-LABEL: test_cmp_v8f64:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ogt <8 x double> %a0, %a1
@@ -767,7 +768,8 @@ define <16 x i1> @test_cmp_v16f32(<16 x float> %a0, <16 x float> %a1) nounwind {
; AVX512-LABEL: test_cmp_v16f32:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpltps %zmm0, %zmm1, %k1
-; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ogt <16 x float> %a0, %a1
@@ -890,7 +892,8 @@ define <8 x i1> @test_cmp_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX512-LABEL: test_cmp_v8i64:
; AVX512: # BB#0:
; AVX512-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = icmp sgt <8 x i64> %a0, %a1
@@ -954,7 +957,8 @@ define <16 x i1> @test_cmp_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind {
; AVX512-LABEL: test_cmp_v16i32:
; AVX512: # BB#0:
; AVX512-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: retq
%1 = icmp sgt <16 x i32> %a0, %a1
diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll
index e3daba5eb16..672eeac0c7f 100644
--- a/test/CodeGen/X86/vector-sext.ll
+++ b/test/CodeGen/X86/vector-sext.ll
@@ -785,7 +785,8 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
@@ -966,7 +967,8 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
@@ -1162,7 +1164,8 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
@@ -1455,7 +1458,8 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
; AVX512-NEXT: retq
;
@@ -1848,7 +1852,8 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: retq
;
@@ -2350,7 +2355,8 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
; AVX512-LABEL: load_sext_16i1_to_16i8:
; AVX512: # BB#0: # %entry
; AVX512-NEXT: kmovw (%rdi), %k1
-; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: retq
;
@@ -2887,7 +2893,8 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; AVX512-LABEL: load_sext_16i1_to_16i16:
; AVX512: # BB#0: # %entry
; AVX512-NEXT: kmovw (%rdi), %k1
-; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: retq
;
@@ -3731,13 +3738,13 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
; AVX512-LABEL: load_sext_32i1_to_32i8:
; AVX512: # BB#0: # %entry
; AVX512-NEXT: kmovw (%rdi), %k1
-; AVX512-NEXT: movl {{.*}}(%rip), %eax
-; AVX512-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
-; AVX512-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512-NEXT: kmovw 2(%rdi), %k1
-; AVX512-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
; AVX512-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: kmovw 2(%rdi), %k1
+; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512-NEXT: retq
;
; X32-SSE41-LABEL: load_sext_32i1_to_32i8:
diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
index ef492e053eb..ddb83c60430 100644
--- a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
@@ -358,9 +358,9 @@ define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) {
; CHECK-LABEL: combine_pshufb_identity_mask:
; CHECK: # BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
-; CHECK-NEXT: vmovdqu8 {{.*#+}} zmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; CHECK-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; CHECK-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; CHECK-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
diff --git a/test/CodeGen/X86/vector-shuffle-v1.ll b/test/CodeGen/X86/vector-shuffle-v1.ll
index 9ab56a308e1..1c128645ad1 100644
--- a/test/CodeGen/X86/vector-shuffle-v1.ll
+++ b/test/CodeGen/X86/vector-shuffle-v1.ll
@@ -74,13 +74,13 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> %
; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
-; AVX512F-NEXT: movq {{.*}}(%rip), %rax
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
-; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,1,0,3,7,7,0]
+; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1
+; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -105,14 +105,14 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <1
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
; AVX512F-NEXT: vpcmpeqd %zmm3, %zmm1, %k2
-; AVX512F-NEXT: movl {{.*}}(%rip), %eax
-; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k2} {z}
-; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
-; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
-; AVX512F-NEXT: vpermt2d %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
-; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k2} {z}
+; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
+; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT: vpermt2d %zmm1, %zmm3, %zmm2
+; AVX512F-NEXT: vpslld $31, %zmm2, %zmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -163,13 +163,13 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) {
; AVX512F-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: movq {{.*}}(%rip), %rax
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0
-; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
-; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm1
+; AVX512F-NEXT: vpbroadcastq %xmm1, %zmm1
+; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -192,7 +192,8 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
; AVX512F-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
@@ -224,7 +225,8 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
; AVX512F-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1]
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
@@ -252,7 +254,8 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
; AVX512F-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
@@ -284,7 +287,8 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
; AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2
; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
@@ -316,14 +320,14 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
; AVX512F-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: movq {{.*}}(%rip), %rax
-; AVX512F-NEXT: movb $51, %cl
-; AVX512F-NEXT: kmovw %ecx, %k2
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k2} {z}
-; AVX512F-NEXT: vpbroadcastq %rax, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: movb $51, %al
+; AVX512F-NEXT: kmovw %eax, %k2
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z}
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
-; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
@@ -355,11 +359,11 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
-; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,1,2,3,4,5,6,7]
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
@@ -371,7 +375,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
; VL_BW_DQ-NEXT: vpmovw2m %xmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
-; VL_BW_DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
+; VL_BW_DQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
; VL_BW_DQ-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
; VL_BW_DQ-NEXT: vpsllq $63, %zmm2, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
@@ -388,7 +392,8 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
; AVX512F-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -426,8 +431,9 @@ define i64 @shuf64i1_zero(i64 %a) {
; AVX512F-NEXT: andq $-32, %rsp
; AVX512F-NEXT: subq $96, %rsp
; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1