summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorKevin Qin <Kevin.Qin@arm.com>2015-03-09 06:14:28 +0000
committerKevin Qin <Kevin.Qin@arm.com>2015-03-09 06:14:28 +0000
commit40e66277f7daa2f5ce6f4f62b91c1a17e50df98c (patch)
treeab3c7b7b96692253ebb08ae743c5d4a9efb2a28d /test
parent994b4c784fa3d63c90b19fd48e522fc6ff3247e7 (diff)
[AArch64] Enable partial & runtime unrolling on cortex-a57
For inner one of nested loops, it is more likely to be a hot loop, and the runtime check can be promoted out from patch 0001, so the overhead is less, we can try a doubled threshold to unroll more loops. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231632 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/Transforms/LoopUnroll/AArch64/lit.local.cfg3
-rw-r--r--test/Transforms/LoopUnroll/AArch64/partial.ll76
-rw-r--r--test/Transforms/LoopUnroll/AArch64/runtime-loop.ll33
3 files changed, 112 insertions, 0 deletions
diff --git a/test/Transforms/LoopUnroll/AArch64/lit.local.cfg b/test/Transforms/LoopUnroll/AArch64/lit.local.cfg
new file mode 100644
index 00000000000..cec29af5bbe
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/LoopUnroll/AArch64/partial.ll b/test/Transforms/LoopUnroll/AArch64/partial.ll
new file mode 100644
index 00000000000..8a1ea80c9d5
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AArch64/partial.ll
@@ -0,0 +1,76 @@
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
+
+; Partial unroll 8 times for this loop.
+define void @unroll1() nounwind {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %inc = add i32 %iv, 1
+ %exitcnd = icmp uge i32 %inc, 1024
+ br i1 %exitcnd, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; CHECK: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+
+; Partial unroll 16 times for this loop.
+define void @unroll2() nounwind {
+entry:
+ br label %loop1
+
+loop1:
+ %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
+ br label %loop2.header
+
+loop2.header:
+ br label %loop2
+
+loop2:
+ %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
+ %inc2 = add i32 %iv2, 1
+ %exitcnd2 = icmp uge i32 %inc2, 1024
+ br i1 %exitcnd2, label %exit2, label %loop2
+
+exit2:
+ br label %loop1.latch
+
+loop1.latch:
+ %inc1 = add i32 %iv1, 1
+ %exitcnd1 = icmp uge i32 %inc1, 1024
+ br i1 %exitcnd2, label %exit, label %loop1
+
+exit:
+ ret void
+}
+
+
+
+; CHECK: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
diff --git a/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll b/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
new file mode 100644
index 00000000000..d3dc081fa6f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
+
+; Tests for unrolling loops with run-time trip counts
+
+; CHECK: %xtraiter = and i32 %n
+; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
+
+; CHECK: for.body.prol:
+; CHECK: for.body:
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ ret i32 %sum.0.lcssa
+}
+
+