diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2014-10-01 11:14:02 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2014-10-01 11:14:02 +0000 |
commit | 7d64681274c0e8d922729c67dfca01c0f4922672 (patch) | |
tree | 7171eb1fd203c11535aef4f963c7ae6028f26622 /test | |
parent | a1b88ab2c14c1332a864a1fd4c2746bc43d005fe (diff) |
[x86] Fix a few more tiny patterns with the new vector shuffle lowering
that keep cropping up in the regression test suite.
This also addresses one of the issues raised on the mailing list with
failing to form 'movsd' in as many cases as we realistically should.
There will be corresponding patches forthcoming for v4f32 at least. This
was a lot of fuss for a relatively small gain, but all the fuss was on
my end trying different ways of holding the pieces of the x86 fragment
patterns *just right*. Now that it works, the code is reasonably simple.
In the new test cases I'm adding here, v2i64 sticks out as just plain
horrible. I've not come up with any great ideas here other than that it
would be nice to recognize when we're *going* to take a domain crossing
hit and cross earlier to get the decent instructions. At least with AVX
it is slightly less silly....
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218756 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/vector-shuffle-128-v2.ll | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll index 67723dd07b2..15e98822246 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -714,6 +714,196 @@ define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) { ret <2 x double> %shuffle } +define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) { +; SSE2-LABEL: insert_reg_lo_v2i64: +; SSE2: # BB#0: +; SSE2-NEXT: movd %rdi, %xmm1 +; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: insert_reg_lo_v2i64: +; SSE3: # BB#0: +; SSE3-NEXT: movd %rdi, %xmm1 +; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: insert_reg_lo_v2i64: +; SSSE3: # BB#0: +; SSSE3-NEXT: movd %rdi, %xmm1 +; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSSE3-NEXT: movapd %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: insert_reg_lo_v2i64: +; SSE41: # BB#0: +; SSE41-NEXT: movd %rdi, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: insert_reg_lo_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovq %rdi, %xmm1 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: insert_reg_lo_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vmovq %rdi, %xmm1 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX2-NEXT: retq + %v = insertelement <2 x i64> undef, i64 %a, i32 0 + %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> + ret <2 x i64> %shuffle +} + +define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) { +; SSE2-LABEL: insert_mem_lo_v2i64: +; SSE2: # BB#0: +; SSE2-NEXT: movq (%rdi), %xmm1 +; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: insert_mem_lo_v2i64: +; SSE3: # BB#0: +; SSE3-NEXT: movq (%rdi), %xmm1 +; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: insert_mem_lo_v2i64: +; SSSE3: # BB#0: +; SSSE3-NEXT: movq (%rdi), %xmm1 +; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSSE3-NEXT: movapd %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: insert_mem_lo_v2i64: +; SSE41: # BB#0: +; SSE41-NEXT: movq (%rdi), %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: insert_mem_lo_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovq (%rdi), %xmm1 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: insert_mem_lo_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vmovq (%rdi), %xmm1 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX2-NEXT: retq + %a = load i64* %ptr + %v = insertelement <2 x i64> undef, i64 %a, i32 0 + %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> + ret <2 x i64> %shuffle +} + +define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) { +; SSE-LABEL: insert_reg_hi_v2i64: +; SSE: # BB#0: +; SSE-NEXT: movd %rdi, %xmm1 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; AVX-LABEL: insert_reg_hi_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vmovq %rdi, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq + %v = insertelement <2 x i64> undef, i64 %a, i32 0 + %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> + ret <2 x i64> %shuffle +} + +define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { +; SSE-LABEL: insert_mem_hi_v2i64: +; SSE: # BB#0: +; SSE-NEXT: movq (%rdi), %xmm1 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; AVX-LABEL: insert_mem_hi_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vmovq (%rdi), %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq + %a = load i64* %ptr + %v = insertelement <2 x i64> undef, i64 %a, i32 0 + %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> + ret <2 x i64> %shuffle +} + +define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) { +; SSE-LABEL: insert_reg_lo_v2f64: +; SSE: # BB#0: +; SSE-NEXT: movsd %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: insert_reg_lo_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq + %v = insertelement <2 x double> undef, double %a, i32 0 + %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> + ret <2 x double> %shuffle +} + +define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { +; SSE-LABEL: insert_mem_lo_v2f64: +; SSE: # BB#0: +; SSE-NEXT: movlpd (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: insert_mem_lo_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq + %a = load double* %ptr + %v = insertelement <2 x double> undef, double %a, i32 0 + %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> + ret <2 x double> %shuffle +} + +define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { +; SSE-LABEL: insert_reg_hi_v2f64: +; SSE: # BB#0: +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: insert_reg_hi_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq + %v = insertelement <2 x double> undef, double %a, i32 0 + %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> + ret <2 x double> %shuffle +} + +define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { +; SSE-LABEL: insert_mem_hi_v2f64: +; SSE: # BB#0: +; SSE-NEXT: movhpd (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: insert_mem_hi_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq + %a = load double* %ptr + %v = insertelement <2 x double> undef, double %a, i32 0 + %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> + ret <2 x double> %shuffle +} + define <2 x double> @insert_dup_reg_v2f64(double %a) { ; FIXME: We should match movddup for SSE3 and higher here. ; |