diff options
author | Silviu Baranga <silviu.baranga@arm.com> | 2016-06-21 15:16:34 +0000 |
---|---|---|
committer | Silviu Baranga <silviu.baranga@arm.com> | 2016-06-21 15:16:34 +0000 |
commit | 7e4cf0a6557a32a62bca7901c7b20a28823f0ff7 (patch) | |
tree | fe25e4933d05b5e98cf4c2a8aaf346255c3215c0 /test/CodeGen | |
parent | b4f51c3ea5617bec17ac927eb12e00cd21891f51 (diff) |
[AArch64] Switch regression tests to test features not CPUs
Summary:
We have switched to using features for all heuristics, but
the tests for these are still using -mcpu, which means we
are not directly testing the features.
This converts at least some of the existing regression tests
to use the new features.
This still leaves the following features untested:
merge-narrow-ld
predictable-select-expensive
alternate-sextload-cvt-f32-pattern
disable-latency-sched-heuristic
Reviewers: mcrosier, t.p.northover, rengolin
Subscribers: MatzeB, aemerson, llvm-commits, rengolin
Differential Revision: http://reviews.llvm.org/D21288
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273271 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll | 14 | ||||
-rw-r--r-- | test/CodeGen/AArch64/aarch64-gep-opt.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/AArch64/merge-store-dependency.ll | 63 | ||||
-rw-r--r-- | test/CodeGen/AArch64/merge-store.ll | 77 | ||||
-rw-r--r-- | test/CodeGen/AArch64/misched-fusion.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/AArch64/no-quad-ldp-stp.ll (renamed from test/CodeGen/AArch64/exynos-quad-ldp-stp.ll) | 9 | ||||
-rw-r--r-- | test/CodeGen/AArch64/remat.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/AArch64/sqrt-fastmath.ll | 2 |
8 files changed, 98 insertions, 81 deletions
diff --git a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll index c581a8bf738..29b71e04261 100644 --- a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll +++ b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll @@ -1,8 +1,14 @@ -; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-EVEN -; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-ODD +; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-EVEN +; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-ODD ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD +; The following tests use the balance-fp-ops feature, and should be independent of +; the target cpu. + +; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN --check-prefix CHECK-BALFP +; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD --check-prefix CHECK-BALFP + ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so ; our test strategy is to: ; * Force the pass to always perform register swapping even if the dest register is of the @@ -75,7 +81,7 @@ entry: ; CHECK: fmsub [[x]] ; CHECK: fmadd [[y]] ; CHECK: fmadd [[x]] -; CHECK-A57: stp [[x]], [[y]] +; CHECK-BALFP: stp [[x]], [[y]] ; CHECK-A53-DAG: str [[x]] ; CHECK-A53-DAG: str [[y]] @@ -170,7 +176,7 @@ declare void @g(...) #1 ; CHECK: fmsub [[x]] ; CHECK: fmadd [[y]] ; CHECK: fmadd [[x]] -; CHECK-A57: stp [[x]], [[y]] +; CHECK-BALFP: stp [[x]], [[y]] ; CHECK-A53-DAG: str [[x]] ; CHECK-A53-DAG: str [[y]] diff --git a/test/CodeGen/AArch64/aarch64-gep-opt.ll b/test/CodeGen/AArch64/aarch64-gep-opt.ll index 93e2ff14ac7..cae00a9b1cb 100644 --- a/test/CodeGen/AArch64/aarch64-gep-opt.ll +++ b/test/CodeGen/AArch64/aarch64-gep-opt.ll @@ -1,6 +1,9 @@ ; RUN: llc -O3 -aarch64-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -O3 -aarch64-gep-opt=true -mattr=-use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s +; RUN: llc -O3 -aarch64-gep-opt=true -mattr=+use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s ; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s ; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s + target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64-linux-gnueabi" diff --git a/test/CodeGen/AArch64/merge-store-dependency.ll b/test/CodeGen/AArch64/merge-store-dependency.ll new file mode 100644 index 00000000000..c68cee91a3c --- /dev/null +++ b/test/CodeGen/AArch64/merge-store-dependency.ll @@ -0,0 +1,63 @@ +; RUN: llc -mcpu cortex-a53 -march aarch64 %s -o - | FileCheck %s --check-prefix=A53 + +; PR26827 - Merge stores causes wrong dependency. +%struct1 = type { %struct1*, %struct1*, i32, i32, i16, i16, void (i32, i32, i8*)*, i8* } +@gv0 = internal unnamed_addr global i32 0, align 4 +@gv1 = internal unnamed_addr global %struct1** null, align 8 + +define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg) { +;CHECK-LABEL: test +entry: +; A53: mov [[DATA:w[0-9]+]], w1 +; A53: str q{{[0-9]+}}, {{.*}} +; A53: str q{{[0-9]+}}, {{.*}} +; A53: str [[DATA]], {{.*}} + + %0 = bitcast %struct1* %fde to i8* + tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false) + %state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4 + store i16 256, i16* %state, align 8 + %fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2 + store i32 %fd, i32* %fd1, align 8 + %force_eof = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 3 + store i32 0, i32* %force_eof, align 4 + %func2 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 6 + store void (i32, i32, i8*)* %func, void (i32, i32, i8*)** %func2, align 8 + %arg3 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 7 + store i8* %arg, i8** %arg3, align 8 + %call = tail call i32 (i32, i32, ...) @fcntl(i32 %fd, i32 4, i8* %0) #6 + %1 = load i32, i32* %fd1, align 8 + %cmp.i = icmp slt i32 %1, 0 + br i1 %cmp.i, label %if.then.i, label %while.body.i.preheader +if.then.i: + unreachable + +while.body.i.preheader: + %2 = load i32, i32* @gv0, align 4 + %3 = icmp eq i32* %fd1, @gv0 + br i1 %3, label %while.body.i.split, label %while.body.i.split.ver.us.preheader + +while.body.i.split.ver.us.preheader: + br label %while.body.i.split.ver.us + +while.body.i.split.ver.us: + %.reg2mem21.0 = phi i32 [ %mul.i.ver.us, %while.body.i.split.ver.us ], [ %2, %while.body.i.split.ver.us.preheader ] + %mul.i.ver.us = shl nsw i32 %.reg2mem21.0, 1 + %4 = icmp sgt i32 %mul.i.ver.us, %1 + br i1 %4, label %while.end.i, label %while.body.i.split.ver.us + +while.body.i.split: + br label %while.body.i.split + +while.end.i: + %call.i = tail call i8* @foo() + store i8* %call.i, i8** bitcast (%struct1*** @gv1 to i8**), align 8 + br label %exit + +exit: + ret void +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare i32 @fcntl(i32, i32, ...) +declare noalias i8* @foo() diff --git a/test/CodeGen/AArch64/merge-store.ll b/test/CodeGen/AArch64/merge-store.ll index ba0e6bbb637..ee9d93b6110 100644 --- a/test/CodeGen/AArch64/merge-store.ll +++ b/test/CodeGen/AArch64/merge-store.ll @@ -1,6 +1,5 @@ -; RUN: llc -march aarch64 %s -o - | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone | FileCheck %s --check-prefix=CYCLONE -; RUN: llc -mcpu cortex-a53 -march aarch64 %s -o - | FileCheck %s --check-prefix=A53 +; RUN: llc -march aarch64 %s -mcpu=cyclone -o - | FileCheck %s --check-prefix=CYCLONE --check-prefix=CHECK +; RUN: llc -march aarch64 %s -mattr=-slow-misaligned-128store -o - | FileCheck %s --check-prefix=MISALIGNED --check-prefix=CHECK @g0 = external global <3 x float>, align 16 @g1 = external global <3 x float>, align 4 @@ -39,9 +38,12 @@ define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) { store <2 x float> %shuffle1, <2 x float>* %idx1, align 8 ret void -; CHECK-LABEL: merge_vec_extract_stores -; CHECK: stur q0, [x0, #24] -; CHECK-NEXT: ret +; MISALIGNED-LABEL: merge_vec_extract_stores +; MISALIGNED: stur q0, [x0, #24] +; MISALIGNED-NEXT: ret + +; FIXME: Ideally we would like to use a generic target for this test, but this relies +; on suppressing store pairs. ; CYCLONE-LABEL: merge_vec_extract_stores ; CYCLONE: ext v1.16b, v0.16b, v0.16b, #8 @@ -49,66 +51,3 @@ define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) { ; CYCLONE-NEXT: str d1, [x0, #32] ; CYCLONE-NEXT: ret } - - -; PR26827 - Merge stores causes wrong dependency. -%struct1 = type { %struct1*, %struct1*, i32, i32, i16, i16, void (i32, i32, i8*)*, i8* } -@gv0 = internal unnamed_addr global i32 0, align 4 -@gv1 = internal unnamed_addr global %struct1** null, align 8 - -define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg) { -;CHECK-LABEL: test -entry: -;A53: mov [[DATA:w[0-9]+]], w1 -;A53: str q{{[0-9]+}}, {{.*}} -;A53: str q{{[0-9]+}}, {{.*}} -;A53: str [[DATA]], {{.*}} - - %0 = bitcast %struct1* %fde to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false) - %state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4 - store i16 256, i16* %state, align 8 - %fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2 - store i32 %fd, i32* %fd1, align 8 - %force_eof = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 3 - store i32 0, i32* %force_eof, align 4 - %func2 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 6 - store void (i32, i32, i8*)* %func, void (i32, i32, i8*)** %func2, align 8 - %arg3 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 7 - store i8* %arg, i8** %arg3, align 8 - %call = tail call i32 (i32, i32, ...) @fcntl(i32 %fd, i32 4, i8* %0) #6 - %1 = load i32, i32* %fd1, align 8 - %cmp.i = icmp slt i32 %1, 0 - br i1 %cmp.i, label %if.then.i, label %while.body.i.preheader -if.then.i: - unreachable - -while.body.i.preheader: - %2 = load i32, i32* @gv0, align 4 - %3 = icmp eq i32* %fd1, @gv0 - br i1 %3, label %while.body.i.split, label %while.body.i.split.ver.us.preheader - -while.body.i.split.ver.us.preheader: - br label %while.body.i.split.ver.us - -while.body.i.split.ver.us: - %.reg2mem21.0 = phi i32 [ %mul.i.ver.us, %while.body.i.split.ver.us ], [ %2, %while.body.i.split.ver.us.preheader ] - %mul.i.ver.us = shl nsw i32 %.reg2mem21.0, 1 - %4 = icmp sgt i32 %mul.i.ver.us, %1 - br i1 %4, label %while.end.i, label %while.body.i.split.ver.us - -while.body.i.split: - br label %while.body.i.split - -while.end.i: - %call.i = tail call i8* @foo() - store i8* %call.i, i8** bitcast (%struct1*** @gv1 to i8**), align 8 - br label %exit - -exit: - ret void -} - -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) -declare i32 @fcntl(i32, i32, ...) -declare noalias i8* @foo() diff --git a/test/CodeGen/AArch64/misched-fusion.ll b/test/CodeGen/AArch64/misched-fusion.ll index d3886932903..0f4c0ac84ce 100644 --- a/test/CodeGen/AArch64/misched-fusion.ll +++ b/test/CodeGen/AArch64/misched-fusion.ll @@ -1,4 +1,6 @@ +; RUN: llc -o - %s -mattr=+macroop-fusion,+use-postra-scheduler | FileCheck %s ; RUN: llc -o - %s -mcpu=cyclone | FileCheck %s + target triple = "arm64-apple-ios" declare void @foobar(i32 %v0, i32 %v1) @@ -8,12 +10,12 @@ declare void @foobar(i32 %v0, i32 %v1) ; CHECK: add w[[ADDRES:[0-9]+]], w1, #7 ; CHECK: sub w[[SUBRES:[0-9]+]], w0, #13 ; CHECK-NEXT: cbnz w[[SUBRES]], [[SKIPBLOCK:LBB[0-9_]+]] -; CHECK: mov x0, x[[ADDRES]] -; CHECK: mov x1, x[[SUBRES]] +; CHECK: mov [[REGTY:[x,w]]]0, [[REGTY]][[ADDRES]] +; CHECK: mov [[REGTY]]1, [[REGTY]][[SUBRES]] ; CHECK: bl _foobar ; CHECK: [[SKIPBLOCK]]: -; CHECK: mov x0, x[[SUBRES]] -; CHECK: mov x1, x[[ADDRES]] +; CHECK: mov [[REGTY]]0, [[REGTY]][[SUBRES]] +; CHECK: mov [[REGTY]]1, [[REGTY]][[ADDRES]] ; CHECK: bl _foobar define void @test_sub_cbz(i32 %a0, i32 %a1) { entry: diff --git a/test/CodeGen/AArch64/exynos-quad-ldp-stp.ll b/test/CodeGen/AArch64/no-quad-ldp-stp.ll index ca811c5c328..19d371adbdf 100644 --- a/test/CodeGen/AArch64/exynos-quad-ldp-stp.ll +++ b/test/CodeGen/AArch64/no-quad-ldp-stp.ll @@ -1,10 +1,11 @@ +; RUN: llc < %s -march=aarch64 -mattr=+no-quad-ldst-pairs -verify-machineinstrs -asm-verbose=false | FileCheck %s ; RUN: llc < %s -march=aarch64 -mcpu=exynos-m1 -verify-machineinstrs -asm-verbose=false | FileCheck %s -; CHECK-LABEL: test_exynos_nopair_st +; CHECK-LABEL: test_nopair_st ; CHECK: str ; CHECK: stur ; CHECK-NOT: stp -define void @test_exynos_nopair_st(double* %ptr, <2 x double> %v1, <2 x double> %v2) { +define void @test_nopair_st(double* %ptr, <2 x double> %v1, <2 x double> %v2) { %tmp1 = bitcast double* %ptr to <2 x double>* store <2 x double> %v2, <2 x double>* %tmp1, align 16 %add.ptr = getelementptr inbounds double, double* %ptr, i64 -2 @@ -13,11 +14,11 @@ define void @test_exynos_nopair_st(double* %ptr, <2 x double> %v1, <2 x double> ret void } -; CHECK-LABEL: test_exynos_nopair_ld +; CHECK-LABEL: test_nopair_ld ; CHECK: ldr ; CHECK: ldr ; CHECK-NOT: ldp -define <2 x i64> @test_exynos_nopair_ld(i64* %p) { +define <2 x i64> @test_nopair_ld(i64* %p) { %a1 = bitcast i64* %p to <2 x i64>* %tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8 %add.ptr2 = getelementptr inbounds i64, i64* %p, i64 2 diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll index c4470d04ff7..b2ca1cca081 100644 --- a/test/CodeGen/AArch64/remat.ll +++ b/test/CodeGen/AArch64/remat.ll @@ -6,6 +6,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m1 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=vulcan -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s %X = type { i64, i64, i64 } declare void @f(%X*) diff --git a/test/CodeGen/AArch64/sqrt-fastmath.ll b/test/CodeGen/AArch64/sqrt-fastmath.ll index 8bc85a5628c..0d9533fd27f 100644 --- a/test/CodeGen/AArch64/sqrt-fastmath.ll +++ b/test/CodeGen/AArch64/sqrt-fastmath.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!sqrt,!vec-sqrt | FileCheck %s --check-prefix=FAULT ; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=sqrt,vec-sqrt | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=neon,-use-reverse-square-root | FileCheck %s --check-prefix=FAULT +; RUN: llc < %s -mtriple=aarch64 -mattr=neon,+use-reverse-square-root | FileCheck %s declare float @llvm.sqrt.f32(float) #1 declare double @llvm.sqrt.f64(double) #1 |