summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2015-04-15 15:47:51 +0000
committerSanjay Patel <spatel@rotateright.com>2015-04-15 15:47:51 +0000
commite3e5fcab94c9cbb2d9924db0368d2ed70a623597 (patch)
treed95773ddf0fe38151ede5c4294e5a740b094f51f /test
parent0332323ab6873b59a104602de766694a88d69787 (diff)
[X86] add an exedepfix entry for movq == movlps == movlpd
This is a 1-line patch (with a TODO for AVX because that will affect even more regression tests) that lets us substitute the appropriate 64-bit store for the float/double/int domains. It's not clear to me exactly what the difference is between the 0xD6 (MOVPQI2QImr) and 0x7E (MOVSDto64mr) opcodes, but this is apparently the right choice. Differential Revision: http://reviews.llvm.org/D8691 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235014 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/X86/2011-10-19-widen_vselect.ll2
-rw-r--r--test/CodeGen/X86/2012-07-10-extload64.ll4
-rw-r--r--test/CodeGen/X86/exedeps-movq.ll73
-rw-r--r--test/CodeGen/X86/sse2-intrinsics-x86.ll2
-rw-r--r--test/CodeGen/X86/vec_insert-5.ll2
-rw-r--r--test/CodeGen/X86/vec_insert-mmx.ll2
-rw-r--r--test/CodeGen/X86/vec_zero_cse.ll2
-rw-r--r--test/CodeGen/X86/vector-shuffle-mmx.ll12
-rw-r--r--test/CodeGen/X86/widen_cast-1.ll6
-rw-r--r--test/CodeGen/X86/widen_cast-4.ll2
-rw-r--r--test/CodeGen/X86/widen_cast-5.ll2
-rw-r--r--test/CodeGen/X86/widen_shuffle-1.ll2
12 files changed, 93 insertions, 18 deletions
diff --git a/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
index da3c3222826..07dff9539cd 100644
--- a/test/CodeGen/X86/2011-10-19-widen_vselect.ll
+++ b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -26,7 +26,7 @@ entry:
}
; CHECK-LABEL: zero_test
-; CHECK: pxor %xmm0, %xmm0
+; CHECK: xorps %xmm0, %xmm0
; CHECK: ret
define void @zero_test() {
diff --git a/test/CodeGen/X86/2012-07-10-extload64.ll b/test/CodeGen/X86/2012-07-10-extload64.ll
index f33fc8c69ef..a366102fbd7 100644
--- a/test/CodeGen/X86/2012-07-10-extload64.ll
+++ b/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -6,7 +6,7 @@ entry:
; CHECK: pmovzxwd
%A27 = load <4 x i16>, <4 x i16>* %in, align 4
%A28 = add <4 x i16> %A27, %A27
-; CHECK: movlpd
+; CHECK: movq
store <4 x i16> %A28, <4 x i16>* %in, align 4
ret void
; CHECK: ret
@@ -18,7 +18,7 @@ define void @store_64(<2 x i32>* %ptr) {
BB:
store <2 x i32> zeroinitializer, <2 x i32>* %ptr
ret void
-;CHECK: movlpd
+;CHECK: movlps
;CHECK: ret
}
diff --git a/test/CodeGen/X86/exedeps-movq.ll b/test/CodeGen/X86/exedeps-movq.ll
new file mode 100644
index 00000000000..b702c8716a9
--- /dev/null
+++ b/test/CodeGen/X86/exedeps-movq.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX
+
+; Verify that we select the correct version of the instruction that stores the low 64-bits
+; of a 128-bit vector. We want to avoid int/fp domain crossing penalties, so ignore the
+; bitcast ops and choose:
+;
+; movlps for floats
+; movlpd for doubles
+; movq for integers
+
+define void @store_floats(<4 x float> %x, i64* %p) {
+; SSE-LABEL: store_floats:
+; SSE: # BB#0:
+; SSE-NEXT: addps %xmm0, %xmm0
+; SSE-NEXT: movlps %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: store_floats:
+; AVX: # BB#0:
+; AVX-NEXT: vaddps %xmm0, %xmm0, %xmm0
+
+
+; !!! FIXME - the AVX version is not handled correctly.
+; AVX-NEXT: vmovq %xmm0, (%rdi)
+
+
+; AVX-NEXT: retq
+ %a = fadd <4 x float> %x, %x
+ %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %c = bitcast <2 x float> %b to i64
+ store i64 %c, i64* %p
+ ret void
+}
+
+define void @store_double(<2 x double> %x, i64* %p) {
+; SSE-LABEL: store_double:
+; SSE: # BB#0:
+; SSE-NEXT: addpd %xmm0, %xmm0
+; SSE-NEXT: movlpd %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: store_double:
+; AVX: # BB#0:
+; AVX-NEXT: vaddpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmovlpd %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %a = fadd <2 x double> %x, %x
+ %b = extractelement <2 x double> %a, i32 0
+ %c = bitcast double %b to i64
+ store i64 %c, i64* %p
+ ret void
+}
+
+define void @store_int(<4 x i32> %x, <2 x float>* %p) {
+; SSE-LABEL: store_int:
+; SSE: # BB#0:
+; SSE-NEXT: paddd %xmm0, %xmm0
+; SSE-NEXT: movq %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: store_int:
+; AVX: # BB#0:
+; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmovq %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %a = add <4 x i32> %x, %x
+ %b = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %c = bitcast <2 x i32> %b to <2 x float>
+ store <2 x float> %c, <2 x float>* %p
+ ret void
+}
+
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll
index cab62a38661..5afebd24bb4 100644
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -581,7 +581,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
; CHECK: test_x86_sse2_storel_dq
; CHECK: movl
- ; CHECK: movq
+ ; CHECK: movlps
call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
ret void
}
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 26ac2a23e31..4018a21090e 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -9,7 +9,7 @@ define void @t1(i32 %a, x86_mmx* %P) nounwind {
; CHECK-NEXT: shll $12, %ecx
; CHECK-NEXT: movd %ecx, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; CHECK-NEXT: movlpd %xmm0, (%eax)
+; CHECK-NEXT: movq %xmm0, (%eax)
; CHECK-NEXT: retl
%tmp12 = shl i32 %a, 12
%tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
diff --git a/test/CodeGen/X86/vec_insert-mmx.ll b/test/CodeGen/X86/vec_insert-mmx.ll
index c77bffcbd37..cbd420885ac 100644
--- a/test/CodeGen/X86/vec_insert-mmx.ll
+++ b/test/CodeGen/X86/vec_insert-mmx.ll
@@ -7,7 +7,7 @@ define x86_mmx @t0(i32 %A) nounwind {
; X86-32: ## BB#0:
; X86-32: movd {{[0-9]+}}(%esp), %xmm0
; X86-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; X86-32-NEXT: movlpd %xmm0, (%esp)
+; X86-32-NEXT: movq %xmm0, (%esp)
; X86-32-NEXT: movq (%esp), %mm0
; X86-32-NEXT: addl $12, %esp
; X86-32-NEXT: retl
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 66c64d32522..8ed8083a284 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -9,7 +9,7 @@
define void @test1() {
;CHECK-LABEL: @test1
-;CHECK: xorpd
+;CHECK: xorps
store <1 x i64> zeroinitializer, <1 x i64>* @M1
store <2 x i32> zeroinitializer, <2 x i32>* @M2
ret void
diff --git a/test/CodeGen/X86/vector-shuffle-mmx.ll b/test/CodeGen/X86/vector-shuffle-mmx.ll
index 094722d2680..dbccd2694b0 100644
--- a/test/CodeGen/X86/vector-shuffle-mmx.ll
+++ b/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -9,7 +9,7 @@ define void @test0(<1 x i64>* %x) {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X32-NEXT: movlpd %xmm0, (%eax)
+; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: test0:
@@ -38,13 +38,13 @@ define void @test1() {
; X32-NEXT: .cfi_def_cfa_offset 24
; X32-NEXT: Ltmp2:
; X32-NEXT: .cfi_offset %edi, -8
-; X32-NEXT: xorpd %xmm0, %xmm0
-; X32-NEXT: movlpd %xmm0, (%esp)
+; X32-NEXT: xorps %xmm0, %xmm0
+; X32-NEXT: movlps %xmm0, (%esp)
; X32-NEXT: movq (%esp), %mm0
; X32-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT: movlpd %xmm0, {{[0-9]+}}(%esp)
+; X32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: movq {{[0-9]+}}(%esp), %mm1
; X32-NEXT: xorl %edi, %edi
; X32-NEXT: maskmovq %mm1, %mm0
@@ -54,8 +54,8 @@ define void @test1() {
;
; X64-LABEL: test1:
; X64: ## BB#0: ## %entry
-; X64-NEXT: pxor %xmm0, %xmm0
-; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: movlps %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
; X64-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index 6b7f489bf85..b0240ddb043 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -3,12 +3,14 @@
; CHECK: movl
; CHECK: paddw
-; CHECK: movlpd
+; CHECK: movq
+
+; FIXME - if this test cares about scheduling, why isn't it being checked?
; Scheduler causes produce a different instruction order
; ATOM: movl
; ATOM: paddw
-; ATOM: movlpd
+; ATOM: movq
; bitcast a v4i16 to v2i32
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 060dfb1011b..8ed2785ae73 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -52,7 +52,7 @@ forbody: ; preds = %forcond
; CHECK-NEXT: psraw $8
; CHECK-NEXT: psraw $2
; CHECK-NEXT: pshufb
-; CHECK-NEXT: movlpd
+; CHECK-NEXT: movq
;
; FIXME: We shouldn't require both a movd and an insert.
; CHECK-WIDE: %forbody
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index ccf0bd1d0b6..4e9d2dfdb5d 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
; CHECK: movl
-; CHECK: movlpd
+; CHECK: movq
; bitcast a i64 to v2i32
define void @convert(<2 x i32>* %dst.addr, i64 %src) nounwind {
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 2aa870f16eb..302805213d0 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -84,7 +84,7 @@ define void @shuf5(<8 x i8>* %p) nounwind {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33]
; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; CHECK-NEXT: movlpd %xmm0, (%eax)
+; CHECK-NEXT: movq %xmm0, (%eax)
; CHECK-NEXT: retl
%v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
store <8 x i8> %v, <8 x i8>* %p, align 8