diff options
author | Kevin Qin <Kevin.Qin@arm.com> | 2015-03-09 06:14:28 +0000 |
---|---|---|
committer | Kevin Qin <Kevin.Qin@arm.com> | 2015-03-09 06:14:28 +0000 |
commit | 40e66277f7daa2f5ce6f4f62b91c1a17e50df98c (patch) | |
tree | ab3c7b7b96692253ebb08ae743c5d4a9efb2a28d /test | |
parent | 994b4c784fa3d63c90b19fd48e522fc6ff3247e7 (diff) |
[AArch64] Enable partial & runtime unrolling on cortex-a57
For inner one of nested loops, it is more likely to be a hot loop,
and the runtime check can be promoted out from patch 0001, so the
overhead is less, we can try a doubled threshold to unroll more loops.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231632 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/Transforms/LoopUnroll/AArch64/lit.local.cfg | 3 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/AArch64/partial.ll | 76 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/AArch64/runtime-loop.ll | 33 |
3 files changed, 112 insertions, 0 deletions
diff --git a/test/Transforms/LoopUnroll/AArch64/lit.local.cfg b/test/Transforms/LoopUnroll/AArch64/lit.local.cfg new file mode 100644 index 00000000000..cec29af5bbe --- /dev/null +++ b/test/Transforms/LoopUnroll/AArch64/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True + diff --git a/test/Transforms/LoopUnroll/AArch64/partial.ll b/test/Transforms/LoopUnroll/AArch64/partial.ll new file mode 100644 index 00000000000..8a1ea80c9d5 --- /dev/null +++ b/test/Transforms/LoopUnroll/AArch64/partial.ll @@ -0,0 +1,76 @@ +; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s + +; Partial unroll 8 times for this loop. +define void @unroll1() nounwind { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add i32 %iv, 1 + %exitcnd = icmp uge i32 %inc, 1024 + br i1 %exitcnd, label %exit, label %loop + +exit: + ret void +} + +; CHECK: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: icmp + +; Partial unroll 16 times for this loop. +define void @unroll2() nounwind { +entry: + br label %loop1 + +loop1: + %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ] + br label %loop2.header + +loop2.header: + br label %loop2 + +loop2: + %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ] + %inc2 = add i32 %iv2, 1 + %exitcnd2 = icmp uge i32 %inc2, 1024 + br i1 %exitcnd2, label %exit2, label %loop2 + +exit2: + br label %loop1.latch + +loop1.latch: + %inc1 = add i32 %iv1, 1 + %exitcnd1 = icmp uge i32 %inc1, 1024 + br i1 %exitcnd2, label %exit, label %loop1 + +exit: + ret void +} + + + +; CHECK: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: icmp diff --git a/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll b/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll new file mode 100644 index 00000000000..d3dc081fa6f --- /dev/null +++ b/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s + +; Tests for unrolling loops with run-time trip counts + +; CHECK: %xtraiter = and i32 %n +; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split + +; CHECK: for.body.prol: +; CHECK: for.body: + +define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %sum.02 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %sum.0.lcssa +} + + |