summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-10-01 11:14:02 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-10-01 11:14:02 +0000
commit7d64681274c0e8d922729c67dfca01c0f4922672 (patch)
tree7171eb1fd203c11535aef4f963c7ae6028f26622 /test
parenta1b88ab2c14c1332a864a1fd4c2746bc43d005fe (diff)
[x86] Fix a few more tiny patterns with the new vector shuffle lowering
that keep cropping up in the regression test suite. This also addresses one of the issues raised on the mailing list with failing to form 'movsd' in as many cases as we realistically should. There will be corresponding patches forthcoming for v4f32 at least. This was a lot of fuss for a relatively small gain, but all the fuss was on my end trying different ways of holding the pieces of the x86 fragment patterns *just right*. Now that it works, the code is reasonably simple. In the new test cases I'm adding here, v2i64 sticks out as just plain horrible. I've not come up with any great ideas here other than that it would be nice to recognize when we're *going* to take a domain crossing hit and cross earlier to get the decent instructions. At least with AVX it is slightly less silly.... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218756 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v2.ll190
1 files changed, 190 insertions, 0 deletions
diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll
index 67723dd07b2..15e98822246 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -714,6 +714,196 @@ define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
ret <2 x double> %shuffle
}
+define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
+; SSE2-LABEL: insert_reg_lo_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd %rdi, %xmm1
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: insert_reg_lo_v2i64:
+; SSE3: # BB#0:
+; SSE3-NEXT: movd %rdi, %xmm1
+; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: insert_reg_lo_v2i64:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %rdi, %xmm1
+; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_reg_lo_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd %rdi, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_reg_lo_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovq %rdi, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_reg_lo_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq %rdi, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT: retq
+ %v = insertelement <2 x i64> undef, i64 %a, i32 0
+ %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
+; SSE2-LABEL: insert_mem_lo_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movq (%rdi), %xmm1
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: insert_mem_lo_v2i64:
+; SSE3: # BB#0:
+; SSE3-NEXT: movq (%rdi), %xmm1
+; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: insert_mem_lo_v2i64:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movq (%rdi), %xmm1
+; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_mem_lo_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movq (%rdi), %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_mem_lo_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovq (%rdi), %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_mem_lo_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq (%rdi), %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT: retq
+ %a = load i64* %ptr
+ %v = insertelement <2 x i64> undef, i64 %a, i32 0
+ %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
+; SSE-LABEL: insert_reg_hi_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movd %rdi, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_reg_hi_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq %rdi, %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %v = insertelement <2 x i64> undef, i64 %a, i32 0
+ %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
+; SSE-LABEL: insert_mem_hi_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movq (%rdi), %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_mem_hi_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq (%rdi), %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %a = load i64* %ptr
+ %v = insertelement <2 x i64> undef, i64 %a, i32 0
+ %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
+; SSE-LABEL: insert_reg_lo_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: movsd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_reg_lo_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
+; SSE-LABEL: insert_mem_lo_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: movlpd (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_mem_lo_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %a = load double* %ptr
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
+; SSE-LABEL: insert_reg_hi_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_reg_hi_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: retq
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
+ ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
+; SSE-LABEL: insert_mem_hi_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: movhpd (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_mem_hi_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %a = load double* %ptr
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
+ ret <2 x double> %shuffle
+}
+
define <2 x double> @insert_dup_reg_v2f64(double %a) {
; FIXME: We should match movddup for SSE3 and higher here.
;