diff options
author | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-10-22 14:55:30 +0000 |
---|---|---|
committer | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-10-22 14:55:30 +0000 |
commit | 81d3998812b5977a220d62076cc446822747af21 (patch) | |
tree | b073cb1be9c65dbdd6cd312c322efd3c04abb0af /test | |
parent | 63a079bf76d139b91e170557cf16b17bdbecd58d (diff) |
Merge master branch
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/R600/@166411 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
51 files changed, 2050 insertions, 76 deletions
diff --git a/test/BugPoint/crash-narrowfunctiontest.ll b/test/BugPoint/crash-narrowfunctiontest.ll index d080d9dd4b0..9df823ab973 100644 --- a/test/BugPoint/crash-narrowfunctiontest.ll +++ b/test/BugPoint/crash-narrowfunctiontest.ll @@ -2,6 +2,7 @@ ; ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null ; REQUIRES: loadable_module +; XFAIL: lto define i32 @foo() { ret i32 1 } diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll index 0eda5667ba4..98c79ee03a6 100644 --- a/test/BugPoint/metadata.ll +++ b/test/BugPoint/metadata.ll @@ -1,6 +1,7 @@ ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s ; REQUIRES: loadable_module +; XFAIL: lto ; Bugpoint should keep the call's metadata attached to the call. diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll index 29a03b83107..13aa9c5a653 100644 --- a/test/BugPoint/remove_arguments_test.ll +++ b/test/BugPoint/remove_arguments_test.ll @@ -1,6 +1,7 @@ ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s ; REQUIRES: loadable_module +; XFAIL: lto ; Test to make sure that arguments are removed from the function if they are ; unnecessary. And clean up any types that that frees up too. diff --git a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll new file mode 100644 index 00000000000..fcc6a7f7e96 --- /dev/null +++ b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s + +%struct.s = type { [4 x i32] } +@v = constant %struct.s zeroinitializer; + +declare void @f(%struct.s* %p); + +; CHECK: t: +define void @t(i32 %a, %struct.s* byval %s) nounwind { +entry: + +; Here we need to only check proper start address of restored %s argument. +; CHECK: sub sp, sp, #16 +; CHECK: push {r11, lr} +; CHECK: add r0, sp, #12 +; CHECK: stm r0, {r1, r2, r3} +; CHECK: add r0, sp, #12 +; CHECK-NEXT: bl f + call void @f(%struct.s* %s) + ret void +} + +; CHECK: caller: +define void @caller() { + +; CHECK: ldm r0, {r1, r2, r3} + call void @t(i32 0, %struct.s* @v); + ret void +} diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll index 38842a9646f..21865f8e4ae 100644 --- a/test/CodeGen/ARM/trap.ll +++ b/test/CodeGen/ARM/trap.ll @@ -14,4 +14,16 @@ entry: unreachable } +define void @t2() nounwind { +entry: +; INSTR: t2: +; INSTR: trap + +; FUNC: t2: +; FUNC: bl __trap + call void @llvm.debugtrap() + unreachable +} + declare void @llvm.trap() nounwind +declare void @llvm.debugtrap() nounwind diff --git a/test/CodeGen/MSP430/fp.ll b/test/CodeGen/MSP430/fp.ll new file mode 100644 index 00000000000..c3273eff05c --- /dev/null +++ b/test/CodeGen/MSP430/fp.ll @@ -0,0 +1,17 @@ +; RUN: llc -O0 -disable-fp-elim < %s | FileCheck %s + +target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16" +target triple = "msp430---elf" + +define void @fp() nounwind { +entry: +; CHECK: fp: +; CHECK: push.w r4 +; CHECK: mov.w r1, r4 +; CHECK: sub.w #2, r1 + %i = alloca i16, align 2 +; CHECK: mov.w #0, -2(r4) + store i16 0, i16* %i, align 2 +; CHECK: pop.w r4 + ret void +} diff --git a/test/CodeGen/Mips/brconeq.ll b/test/CodeGen/Mips/brconeq.ll new file mode 100644 index 00000000000..613391557ef --- /dev/null +++ b/test/CodeGen/Mips/brconeq.ll @@ -0,0 +1,38 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@j = global i32 10, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @i, align 4 + %1 = load i32* @j, align 4 + %cmp = icmp eq i32 %0, %1 +; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + + + + + + + + + + + + + + + diff --git a/test/CodeGen/Mips/brconeqk.ll b/test/CodeGen/Mips/brconeqk.ll new file mode 100644 index 00000000000..2c0e72dabd2 --- /dev/null +++ b/test/CodeGen/Mips/brconeqk.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @i, align 4 + %cmp = icmp eq i32 %0, 10 + br i1 %cmp, label %if.end, label %if.then +; 16: cmpi ${{[0-9]+}}, {{[0-9]+}} +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + + diff --git a/test/CodeGen/Mips/brconeqz.ll b/test/CodeGen/Mips/brconeqz.ll new file mode 100644 index 00000000000..5586e7b976d --- /dev/null +++ b/test/CodeGen/Mips/brconeqz.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @i, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.end, label %if.then +; 16: beqz ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + diff --git a/test/CodeGen/Mips/brconge.ll b/test/CodeGen/Mips/brconge.ll new file mode 100644 index 00000000000..02f0a633b31 --- /dev/null +++ b/test/CodeGen/Mips/brconge.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@j = global i32 10, align 4 +@k = global i32 5, align 4 +@result1 = global i32 0, align 4 +@result2 = global i32 1, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @i, align 4 + %1 = load i32* @j, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %if.then, label %if.end + +; 16: slt ${{[0-9]+}}, ${{[0-9]+}} +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result1, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %2 = load i32* @k, align 4 + %cmp1 = icmp slt i32 %0, %2 + br i1 %cmp1, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + store i32 1, i32* @result1, align 4 + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + ret void +} + + diff --git a/test/CodeGen/Mips/brcongt.ll b/test/CodeGen/Mips/brcongt.ll new file mode 100644 index 00000000000..767b51b21b9 --- /dev/null +++ b/test/CodeGen/Mips/brcongt.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@j = global i32 10, align 4 +@k = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @i, align 4 + %1 = load i32* @j, align 4 + %cmp = icmp sgt i32 %0, %1 + br i1 %cmp, label %if.end, label %if.then +; 16: slt ${{[0-9]+}}, ${{[0-9]+}} +; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + + diff --git a/test/CodeGen/Mips/brconle.ll b/test/CodeGen/Mips/brconle.ll new file mode 100644 index 00000000000..854b2481c6e --- /dev/null +++ b/test/CodeGen/Mips/brconle.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 -5, align 4 +@j = global i32 10, align 4 +@k = global i32 -5, align 4 +@result1 = global i32 0, align 4 +@result2 = global i32 1, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @j, align 4 + %1 = load i32* @i, align 4 + %cmp = icmp sgt i32 %0, %1 + br i1 %cmp, label %if.then, label %if.end + +; 16: slt ${{[0-9]+}}, ${{[0-9]+}} +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result1, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %2 = load i32* @k, align 4 + %cmp1 = icmp sgt i32 %1, %2 + br i1 %cmp1, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + store i32 0, i32* @result1, align 4 + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + ret void +} + + diff --git a/test/CodeGen/Mips/brconlt.ll b/test/CodeGen/Mips/brconlt.ll new file mode 100644 index 00000000000..931a3e8c7ba --- /dev/null +++ b/test/CodeGen/Mips/brconlt.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@j = global i32 10, align 4 +@k = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @j, align 4 + %1 = load i32* @i, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %if.end, label %if.then + +; 16: slt ${{[0-9]+}}, ${{[0-9]+}} +; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + + diff --git a/test/CodeGen/Mips/brconne.ll b/test/CodeGen/Mips/brconne.ll new file mode 100644 index 00000000000..5d5bde3fcf9 --- /dev/null +++ b/test/CodeGen/Mips/brconne.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i32 5, align 4 +@j = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @j, align 4 + %1 = load i32* @i, align 4 + %cmp = icmp eq i32 %0, %1 + br i1 %cmp, label %if.then, label %if.end +; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} +; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] +; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + + diff --git a/test/CodeGen/Mips/brconnek.ll b/test/CodeGen/Mips/brconnek.ll new file mode 100644 index 00000000000..6208d7c5a04 --- /dev/null +++ b/test/CodeGen/Mips/brconnek.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@j = global i32 5, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @j, align 4 + %cmp = icmp eq i32 %0, 5 + br i1 %cmp, label %if.then, label %if.end + +; 16: cmpi ${{[0-9]+}}, {{[0-9]+}} +; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] +; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + + diff --git a/test/CodeGen/Mips/brconnez.ll b/test/CodeGen/Mips/brconnez.ll new file mode 100644 index 00000000000..47db7901b51 --- /dev/null +++ b/test/CodeGen/Mips/brconnez.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@j = global i32 0, align 4 +@result = global i32 0, align 4 + +define void @test() nounwind { +entry: + %0 = load i32* @j, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.end + +; 16: bnez ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]] +; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) +; 16: $[[LABEL]]: + +if.then: ; preds = %entry + store i32 1, i32* @result, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + + diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll new file mode 100644 index 00000000000..498c5fe1747 --- /dev/null +++ b/test/CodeGen/Mips/mips64-sret.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -O0 < %s + +%struct.S = type { [8 x i32] } + +@g = common global %struct.S zeroinitializer, align 4 + +define void @f(%struct.S* noalias sret %agg.result) nounwind { +entry: + %0 = bitcast %struct.S* %agg.result to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.S* @g to i8*), i64 32, i32 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind diff --git a/test/CodeGen/Mips/tailcall.ll b/test/CodeGen/Mips/tailcall.ll new file mode 100644 index 00000000000..4989636a20d --- /dev/null +++ b/test/CodeGen/Mips/tailcall.ll @@ -0,0 +1,100 @@ +; RUN: llc -march=mipsel -relocation-model=pic -enable-mips-tail-calls < %s | \ +; RUN: FileCheck %s -check-prefix=PIC32 +; RUN: llc -march=mipsel -relocation-model=static \ +; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=STATIC32 +; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+n64 -enable-mips-tail-calls \ +; RUN: < %s | FileCheck %s -check-prefix=N64 + +@g0 = common global i32 0, align 4 +@g1 = common global i32 0, align 4 +@g2 = common global i32 0, align 4 +@g3 = common global i32 0, align 4 +@g4 = common global i32 0, align 4 +@g5 = common global i32 0, align 4 +@g6 = common global i32 0, align 4 +@g7 = common global i32 0, align 4 +@g8 = common global i32 0, align 4 +@g9 = common global i32 0, align 4 + +define i32 @caller1(i32 %a0) nounwind { +entry: +; PIC32-NOT: jalr +; STATIC32-NOT: jal +; N64-NOT: jalr + + %call = tail call i32 @callee1(i32 1, i32 1, i32 1, i32 %a0) nounwind + ret i32 %call +} + +declare i32 @callee1(i32, i32, i32, i32) + +define i32 @caller2(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { +entry: +; PIC32: jalr +; STATIC32: jal +; N64-NOT: jalr + + %call = tail call i32 @callee2(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind + ret i32 %call +} + +declare i32 @callee2(i32, i32, i32, i32, i32) + +define i32 @caller3(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind { +entry: +; PIC32: jalr +; STATIC32: jal +; N64-NOT: jalr + + %call = tail call i32 @callee3(i32 1, i32 1, i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind + ret i32 %call +} + +declare i32 @callee3(i32, i32, i32, i32, i32, i32, i32, i32) + +define i32 @caller4(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { +entry: +; PIC32: jalr +; STATIC32: jal +; N64: jalr + + %call = tail call i32 @callee4(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind + ret i32 %call +} + +declare i32 @callee4(i32, i32, i32, i32, i32, i32, i32, i32, i32) + +define i32 @caller5() nounwind readonly { +entry: +; PIC32-NOT: jalr +; STATIC32-NOT: jal +; N64-NOT: jalr + + %0 = load i32* @g0, align 4 + %1 = load i32* @g1, align 4 + %2 = load i32* @g2, align 4 + %3 = load i32* @g3, align 4 + %4 = load i32* @g4, align 4 + %5 = load i32* @g5, align 4 + %6 = load i32* @g6, align 4 + %7 = load i32* @g7, align 4 + %8 = load i32* @g8, align 4 + %9 = load i32* @g9, align 4 + %call = tail call fastcc i32 @callee5(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) + ret i32 %call +} + +define internal fastcc i32 @callee5(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9) nounwind readnone noinline { +entry: + %add = add nsw i32 %a1, %a0 + %add1 = add nsw i32 %add, %a2 + %add2 = add nsw i32 %add1, %a3 + %add3 = add nsw i32 %add2, %a4 + %add4 = add nsw i32 %add3, %a5 + %add5 = add nsw i32 %add4, %a6 + %add6 = add nsw i32 %add5, %a7 + %add7 = add nsw i32 %add6, %a8 + %add8 = add nsw i32 %add7, %a9 + ret i32 %add8 +} + diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll new file mode 100644 index 00000000000..5a0c072c9c5 --- /dev/null +++ b/test/CodeGen/PowerPC/i64_fp_round.ll @@ -0,0 +1,27 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define float @test(i64 %x) nounwind readnone { +entry: + %conv = sitofp i64 %x to float + ret float %conv +} + +; Verify that we get the code sequence needed to avoid double-rounding. +; Note that only parts of the sequence are checked for here, to allow +; for minor code generation differences. + +; CHECK: sradi [[REGISTER:[0-9]+]], 3, 53 +; CHECK: addi [[REGISTER:[0-9]+]], [[REGISTER]], 1 +; CHECK: cmpldi 0, [[REGISTER]], 1 +; CHECK: isel [[REGISTER:[0-9]+]], {{[0-9]+}}, 3, 1 +; CHECK: std [[REGISTER]], -{{[0-9]+}}(1) + + +; Also check that with -enable-unsafe-fp-math we do not get that extra +; code sequence. Simply verify that there is no "isel" present. + +; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE +; CHECK-UNSAFE-NOT: isel + diff --git a/test/CodeGen/X86/2012-10-18-crash-dagco.ll b/test/CodeGen/X86/2012-10-18-crash-dagco.ll new file mode 100644 index 00000000000..5b98624a37b --- /dev/null +++ b/test/CodeGen/X86/2012-10-18-crash-dagco.ll @@ -0,0 +1,61 @@ +; RUN: llc -march=x86-64 -mcpu=corei7 -disable-cgp-select2branch < %s + +; We should not crash on this test. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin9.0.0" + +@global = external constant [411 x i8], align 1 + +define void @snork() nounwind { +bb: + br i1 undef, label %bb26, label %bb27 + +bb26: ; preds = %bb48, %bb26, %bb + switch i32 undef, label %bb26 [ + i32 142771596, label %bb28 + ] + +bb27: ; preds = %bb48, %bb + switch i32 undef, label %bb49 [ + i32 142771596, label %bb28 + ] + +bb28: ; preds = %bb27, %bb26 + %tmp = load i32* null + %tmp29 = trunc i32 %tmp to i8 + store i8* undef, i8** undef + %tmp30 = load i32* null + %tmp31 = icmp eq i32 %tmp30, 0 + %tmp32 = getelementptr inbounds [411 x i8]* @global, i32 0, i32 undef + %tmp33 = load i8* %tmp32, align 1 + %tmp34 = getelementptr inbounds [411 x i8]* @global, i32 0, i32 0 + %tmp35 = load i8* %tmp34, align 1 + %tmp36 = select i1 %tmp31, i8 %tmp35, i8 %tmp33 + %tmp37 = select i1 undef, i8 %tmp29, i8 %tmp36 + %tmp38 = zext i8 %tmp37 to i32 + %tmp39 = select i1 undef, i32 0, i32 %tmp38 + %tmp40 = getelementptr inbounds i32* null, i32 %tmp39 + %tmp41 = load i32* %tmp40, align 4 + %tmp42 = load i32* undef, align 4 + %tmp43 = load i32* undef + %tmp44 = xor i32 %tmp42, %tmp43 + %tmp45 = lshr i32 %tmp44, 8 + %tmp46 = lshr i32 %tmp44, 7 + call void @spam() + unreachable + +bb47: ; No predecessors! + ret void + +bb48: ; No predecessors! + br i1 undef, label %bb27, label %bb26 + +bb49: ; preds = %bb49, %bb27 + br label %bb49 + +bb50: ; preds = %bb50 + br label %bb50 +} + +declare void @spam() noreturn nounwind diff --git a/test/CodeGen/X86/buildvec-insertvec.ll b/test/CodeGen/X86/buildvec-insertvec.ll new file mode 100644 index 00000000000..3fb69a48b3c --- /dev/null +++ b/test/CodeGen/X86/buildvec-insertvec.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind { + %t0 = fptoui <3 x float> %in to <3 x i8> + %t1 = shufflevector <3 x i8> %t0, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> + %t2 = insertelement <4 x i8> %t1, i8 -1, i32 3 + store <4 x i8> %t2, <4 x i8>* %out, align 4 + ret void +; CHECK: foo +; CHECK: cvttps2dq +; CHECK-NOT: pextrd +; CHECK: pinsrd +; CHECK-NEXT: pshufb +; CHECK: ret +} diff --git a/test/CodeGen/X86/extract-concat.ll b/test/CodeGen/X86/extract-concat.ll new file mode 100644 index 00000000000..704309eb650 --- /dev/null +++ b/test/CodeGen/X86/extract-concat.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define void @foo(<4 x float> %in, <4 x i8>* %out) { + %t0 = fptosi <4 x float> %in to <4 x i32> + %t1 = trunc <4 x i32> %t0 to <4 x i16> + %t2 = shufflevector <4 x i16> %t1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %t3 = trunc <8 x i16> %t2 to <8 x i8> + %t4 = shufflevector <8 x i8> %t3, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %t5 = insertelement <4 x i8> %t4, i8 -1, i32 3 + store <4 x i8> %t5, <4 x i8>* %out + ret void +; CHECK: foo +; CHECK: cvttps2dq +; CHECK-NOT: pextrd +; CHECK: pshufb +; CHECK: ret +} diff --git a/test/CodeGen/X86/pr14090.ll b/test/CodeGen/X86/pr14090.ll new file mode 100644 index 00000000000..d76b912fd8e --- /dev/null +++ b/test/CodeGen/X86/pr14090.ll @@ -0,0 +1,76 @@ +; RUN: llc < %s -march=x86-64 -print-before=stack-coloring -print-after=stack-coloring >%t 2>&1 && FileCheck <%t %s + +define void @foo(i64* %retval.i, i32 %call, i32* %.ph.i80, i32 %fourteen, i32* %out.lo, i32* %out.hi) nounwind align 2 { +entry: + %_Tmp.i39 = alloca i64, align 8 + %retval.i33 = alloca i64, align 8 + %_Tmp.i = alloca i64, align 8 + %retval.i.i = alloca i64, align 8 + %_First.i = alloca i64, align 8 + + %0 = load i64* %retval.i, align 8 + + %1 = load i64* %retval.i, align 8 + + %_Tmp.i39.0.cast73 = bitcast i64* %_Tmp.i39 to i8* + call void @llvm.lifetime.start(i64 8, i8* %_Tmp.i39.0.cast73) + store i64 %1, i64* %_Tmp.i39, align 8 + %cmp.i.i.i40 = icmp slt i32 %call, 0 + %2 = lshr i64 %1, 32 + %3 = trunc i64 %2 to i32 + %sub.i.i.i44 = sub i32 0, %call + %cmp2.i.i.i45 = icmp ult i32 %3, %sub.i.i.i44 + %or.cond.i.i.i46 = and i1 %cmp.i.i.i40, %cmp2.i.i.i45 + %add.i.i.i47 = add i32 %3, %call + %sub5.i.i.i48 = lshr i32 %add.i.i.i47, 5 + %trunc.i50 = trunc i64 %1 to i32 + %inttoptr.i51 = inttoptr i32 %trunc.i50 to i32* + %add61617.i.i.i52 = or i32 %sub5.i.i.i48, -134217728 + %add61617.i.sub5.i.i.i53 = select i1 %or.cond.i.i.i46, i32 %add61617.i.i.i52, i32 %sub5.i.i.i48 + %storemerge2.i.i54 = getelementptr inbounds i32* %inttoptr.i51, i32 %add61617.i.sub5.i.i.i53 + %_Tmp.i39.0.cast74 = bitcast i64* %_Tmp.i39 to i32** + store i32* %storemerge2.i.i54, i32** %_Tmp.i39.0.cast74, align 8 + %storemerge.i.i55 = and i32 %add.i.i.i47, 31 + %_Tmp.i39.4.raw_idx = getelementptr inbounds i8* %_Tmp.i39.0.cast73, i32 4 + %_Tmp.i39.4.cast = bitcast i8* %_Tmp.i39.4.raw_idx to i32* + store i32 %storemerge.i.i55, i32* %_Tmp.i39.4.cast, align 4 + %srcval.i56 = load i64* %_Tmp.i39, align 8 + call void @llvm.lifetime.end(i64 8, i8* %_Tmp.i39.0.cast73) + +; CHECK: Before Merge disjoint stack slots +; CHECK: [[PREFIX15:MOV64mr.*<fi#]]{{[0-9]}}[[SUFFIX15:.*;]] mem:ST8[%fifteen] +; CHECK: [[PREFIX87:MOV32mr.*;]] mem:ST4[%sunkaddr87] + +; CHECK: After Merge disjoint stack slots +; CHECK: [[PREFIX15]]{{[0-9]}}[[SUFFIX15]] mem:ST8[%_Tmp.i39] +; CHECK: [[PREFIX87]] mem:ST4[<unknown>] + + %fifteen = bitcast i64* %retval.i.i to i32** + %sixteen = bitcast i64* %retval.i.i to i8* + call void @llvm.lifetime.start(i64 8, i8* %sixteen) + store i32* %.ph.i80, i32** %fifteen, align 8, !tbaa !0 + %sunkaddr = ptrtoint i64* %retval.i.i to i32 + %sunkaddr86 = add i32 %sunkaddr, 4 + %sunkaddr87 = inttoptr i32 %sunkaddr86 to i32* + store i32 %fourteen, i32* %sunkaddr87, align 4, !tbaa !3 + %seventeen = load i64* %retval.i.i, align 8 + call void @llvm.lifetime.end(i64 8, i8* %sixteen) + %eighteen = lshr i64 %seventeen, 32 + %nineteen = trunc i64 %eighteen to i32 + %shl.i.i.i = shl i32 1, %nineteen + + store i32 %shl.i.i.i, i32* %out.lo, align 8 + store i32 %nineteen, i32* %out.hi, align 8 + + ret void +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind + +declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"any pointer", metadata !1} +!4 = metadata !{metadata !"vtable pointer", metadata !2} diff --git a/test/CodeGen/X86/pr14098.ll b/test/CodeGen/X86/pr14098.ll new file mode 100644 index 00000000000..6ce2449ab6a --- /dev/null +++ b/test/CodeGen/X86/pr14098.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple i386-unknown-linux-gnu -relocation-model=pic -verify-machineinstrs < %s +; We used to crash on this. + +declare void @foo() +declare void @foo3(i1 %x) +define void @bar(i1 %a1, i16 %a2) nounwind align 2 { +bb0: + %a3 = trunc i16 %a2 to i8 + %a4 = lshr i16 %a2, 8 + %a5 = trunc i16 %a4 to i8 + br i1 %a1, label %bb1, label %bb2 +bb1: + br label %bb2 +bb2: + %a6 = phi i8 [ 3, %bb0 ], [ %a5, %bb1 ] + %a7 = phi i8 [ 9, %bb0 ], [ %a3, %bb1 ] + %a8 = icmp eq i8 %a6, 1 + call void @foo() + %a9 = icmp eq i8 %a7, 0 + call void @foo3(i1 %a9) + call void @foo3(i1 %a8) + ret void +} diff --git a/test/CodeGen/X86/sjlj.ll b/test/CodeGen/X86/sjlj.ll index d594e982994..681db009438 100644 --- a/test/CodeGen/X86/sjlj.ll +++ b/test/CodeGen/X86/sjlj.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 -relocation-model=static | FileCheck --check-prefix=X86 %s -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck --check-prefix=X64 %s +; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 -relocation-model=pic | FileCheck --check-prefix=PIC86 %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 -relocation-model=static | FileCheck --check-prefix=X64 %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 -relocation-model=pic | FileCheck --check-prefix=PIC64 %s @buf = internal global [5 x i8*] zeroinitializer @@ -20,14 +22,26 @@ define i32 @sj0() nounwind { ret i32 %r ; X86: sj0 ; x86: movl %ebp, buf -; x86: movl ${{.*LBB.*}}, buf+4 ; X86: movl %esp, buf+8 +; x86: movl ${{.*LBB.*}}, buf+4 ; X86: ret +; PIC86: sj0 +; PIC86: movl %ebp, buf@GOTOFF(%[[GOT:.*]]) +; PIC86: movl %esp, buf@GOTOFF+8(%[[GOT]]) +; PIC86: leal {{.*LBB.*}}@GOTOFF(%[[GOT]]), %[[LREG:.*]] +; PIC86: movl %[[LREG]], buf@GOTOFF+4 +; PIC86: ret ; X64: sj0 ; x64: movq %rbp, buf(%rip) ; x64: movq ${{.*LBB.*}}, buf+8(%rip) ; X64: movq %rsp, buf+16(%rip) ; X64: ret +; PIC64: sj0 +; PIC64: movq %rbp, buf(%rip) +; PIC64: movq %rsp, buf+16(%rip) +; PIC64: leaq {{.*LBB.*}}(%rip), %[[LREG:.*]] +; PIC64: movq %[[LREG]], buf+8(%rip) +; PIC64: ret } define void @lj0() nounwind { diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll index d1900018706..655f69c16fd 100644 --- a/test/Instrumentation/AddressSanitizer/basic.ll +++ b/test/Instrumentation/AddressSanitizer/basic.ll @@ -69,3 +69,23 @@ entry: store i32 42, i32* %a ret void } + +; Check that asan leaves just one alloca. + +declare void @alloca_test_use([10 x i8]*) +define void @alloca_test() address_safety { +entry: + %x = alloca [10 x i8], align 1 + %y = alloca [10 x i8], align 1 + %z = alloca [10 x i8], align 1 + call void @alloca_test_use([10 x i8]* %x) + call void @alloca_test_use([10 x i8]* %y) + call void @alloca_test_use([10 x i8]* %z) + ret void +} + +; CHECK: define void @alloca_test() +; CHECK: = alloca +; CHECK-NOT: = alloca +; CHECK: ret void + diff --git a/test/Makefile b/test/Makefile index 3c6b5b68a4f..810fdded465 100644 --- a/test/Makefile +++ b/test/Makefile @@ -117,6 +117,16 @@ else ENABLE_ASSERTIONS=1 endif +# Derive whether or not LTO is enabled by checking the extra options. +LTO_IS_ENABLED := 0 +ifneq ($(findstring -flto,$(CompileCommonOpts)),) +LTO_IS_ENABLED := 1 +else +ifneq ($(findstring -O4,$(CompileCommonOpts)),) +LTO_IS_ENABLED := 1 +endif +endif + lit.site.cfg: FORCE @echo "Making LLVM 'lit.site.cfg' file..." @$(ECHOPATH) s=@TARGET_TRIPLE@=$(TARGET_TRIPLE)=g > lit.tmp @@ -129,6 +139,7 @@ lit.site.cfg: FORCE @$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc $(subst *,'\\\"',*$(subst =,"\\=",$(CXX_FOR_OCAMLOPT))*) -I $(LibDir)/ocaml=g >> lit.tmp @$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp @$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp + @$(ECHOPATH) s=@LTO_IS_ENABLED@=$(LTO_IS_ENABLED)=g >> lit.tmp @$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp @$(ECHOPATH) s=@LLVM_BINDINGS@=$(BINDINGS_TO_BUILD)=g >> lit.tmp @$(ECHOPATH) s=@HOST_OS@=$(HOST_OS)=g >> lit.tmp diff --git a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll index f992d415477..6294543cd81 100644 --- a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll +++ b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll @@ -79,3 +79,53 @@ entry: ; CHECK-AO-NOT: <2 x } +; Simple 3-pair chain with loads and stores (using ptrs and gep) +; using pointer vectors. +define void @test3(<2 x i64*>* %a, <2 x i64*>* %b, <2 x i64*>* %c) nounwind uwtable readonly { +entry: + %i0 = load <2 x i64*>* %a, align 8 + %i1 = load <2 x i64*>* %b, align 8 + %arrayidx3 = getelementptr inbounds <2 x i64*>* %a, i64 1 + %i3 = load <2 x i64*>* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds <2 x i64*>* %b, i64 1 + %i4 = load <2 x i64*>* %arrayidx4, align 8 + %j1 = extractelement <2 x i64*> %i1, i32 0 + %j4 = extractelement <2 x i64*> %i4, i32 0 + %o1 = load i64* %j1, align 8 + %o4 = load i64* %j4, align 8 + %j0 = extractelement <2 x i64*> %i0, i32 0 + %j3 = extractelement <2 x i64*> %i3, i32 0 + %ptr0 = getelementptr inbounds i64* %j0, i64 %o1 + %ptr3 = getelementptr inbounds i64* %j3, i64 %o4 + %qtr0 = insertelement <2 x i64*> undef, i64* %ptr0, i32 0 + %rtr0 = insertelement <2 x i64*> %qtr0, i64* %ptr0, i32 1 + %qtr3 = insertelement <2 x i64*> undef, i64* %ptr3, i32 0 + %rtr3 = insertelement <2 x i64*> %qtr3, i64* %ptr3, i32 1 + store <2 x i64*> %rtr0, <2 x i64*>* %c, align 8 + %arrayidx5 = getelementptr inbounds <2 x i64*>* %c, i64 1 + store <2 x i64*> %rtr3, <2 x i64*>* %arrayidx5, align 8 + ret void +; CHECK: @test3 +; CHECK: %i0.v.i0 = bitcast <2 x i64*>* %a to <4 x i64*>* +; CHECK: %i1 = load <2 x i64*>* %b, align 8 +; CHECK: %i0 = load <4 x i64*>* %i0.v.i0, align 8 +; CHECK: %arrayidx4 = getelementptr inbounds <2 x i64*>* %b, i64 1 +; CHECK: %i4 = load <2 x i64*>* %arrayidx4, align 8 +; CHECK: %j1 = extractelement <2 x i64*> %i1, i32 0 +; CHECK: %j4 = extractelement <2 x i64*> %i4, i32 0 +; CHECK: %o1 = load i64* %j1, align 8 +; CHECK: %o4 = load i64* %j4, align 8 +; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0 +; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1 +; CHECK: %ptr0.v.i0 = shufflevector <4 x i64*> %i0, <4 x i64*> undef, <2 x i32> <i32 0, i32 2> +; CHECK: %ptr0 = getelementptr inbounds <2 x i64*> %ptr0.v.i0, <2 x i64> %ptr0.v.i1.2 +; CHECK: %rtr0 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> zeroinitializer +; CHECK: %rtr3 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> <i32 1, i32 1> +; CHECK: %0 = bitcast <2 x i64*>* %c to <4 x i64*>* +; CHECK: %1 = shufflevector <2 x i64*> %rtr0, <2 x i64*> %rtr3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK: store <4 x i64*> %1, <4 x i64*>* %0, align 8 +; CHECK: ret void +; CHECK-AO: @test3 +; CHECK-AO-NOT: <4 x +} + diff --git a/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll b/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll new file mode 100644 index 00000000000..5c478669d29 --- /dev/null +++ b/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +; PR12627 +define void @test1(i32 %x) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %phi1 = phi i1 [ false, %entry ], [ %cmpa, %for.body ] + %phi2 = phi i1 [ false, %entry ], [ %cmpb, %for.body ] + %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + tail call void @aux(i1 %phi1, i1 %phi2) nounwind + %cmpa = icmp sgt i32 %i.07, 200 + %cmpb = icmp sgt i32 %i.07, 100 + %inc = add nsw i32 %i.07, 1 + %exitcond = icmp eq i32 %inc, 100 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void + +; CHECK: @test1 +; CHECK-NOT: phi i1 +; CHECK: call void @aux(i1 false, i1 false) +} + +declare void @aux(i1, i1) diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll index 62af42b9d68..1b702a3b1a3 100644 --- a/test/Transforms/IndVarSimplify/crash.ll +++ b/test/Transforms/IndVarSimplify/crash.ll @@ -113,3 +113,21 @@ bb9: ret void } +; PR12536 +define void @fn1() noreturn nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.end, %entry + %b.0 = phi i32 [ undef, %entry ], [ %conv, %for.end ] + br label %for.cond1 + +for.cond1: ; preds = %for.cond1, %for.cond + %c.0 = phi i32 [ %b.0, %for.cond1 ], [ 0, %for.cond ] + br i1 undef, label %for.cond1, label %for.end + +for.end: ; preds = %for.cond1 + %cmp2 = icmp slt i32 %c.0, 1 + %conv = zext i1 %cmp2 to i32 + br label %for.cond +} diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll index bfdd000e38e..507f695e67c 100644 --- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll +++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll @@ -199,7 +199,6 @@ entry: ; back to the loop iv. ; ; CHECK: loop: -; CHECK: phi i32 ; CHECK-NOT: phi ; CHECK: exit: loop: diff --git a/test/Transforms/InstCombine/obfuscated_splat.ll b/test/Transforms/InstCombine/obfuscated_splat.ll new file mode 100644 index 00000000000..c25dade168a --- /dev/null +++ b/test/Transforms/InstCombine/obfuscated_splat.ll @@ -0,0 +1,11 @@ +; RUN: opt -instcombine -S %s | FileCheck %s + +define void @test(<4 x float> *%in_ptr, <4 x float> *%out_ptr) { + %A = load <4 x float>* %in_ptr, align 16 + %B = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef> + %C = shufflevector <4 x float> %B, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 4, i32 undef> + %D = shufflevector <4 x float> %C, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 4> +; CHECK: %D = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %D, <4 x float> *%out_ptr + ret void +} diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll index 4baae2618dd..cc3aacdce3c 100644 --- a/test/Transforms/InstCombine/select.ll +++ b/test/Transforms/InstCombine/select.ll @@ -829,3 +829,37 @@ define i1 @test63(i1 %A, i1 %B) { ; CHECK: %C = or i1 %B, %not ; CHECK: ret i1 %C } + +; PR14131 +define void @test64(i32 %p, i16 %b) noreturn nounwind { +entry: + %p.addr.0.insert.mask = and i32 %p, -65536 + %conv2 = and i32 %p, 65535 + br i1 undef, label %lor.rhs, label %lor.end + +lor.rhs: + %p.addr.0.extract.trunc = trunc i32 %p.addr.0.insert.mask to i16 + %phitmp = zext i16 %p.addr.0.extract.trunc to i32 + br label %lor.end + +lor.end: + %t.1 = phi i32 [ 0, %entry ], [ %phitmp, %lor.rhs ] + %conv6 = zext i16 %b to i32 + %div = udiv i32 %conv6, %t.1 + %tobool8 = icmp eq i32 %div, 0 + %cmp = icmp eq i32 %t.1, 0 + %cmp12 = icmp ult i32 %conv2, 2 + %cmp.sink = select i1 %tobool8, i1 %cmp12, i1 %cmp + br i1 %cmp.sink, label %cond.end17, label %cond.false16 + +cond.false16: + br label %cond.end17 + +cond.end17: + br label %while.body + +while.body: + br label %while.body +; CHECK: @test64 +; CHECK-NOT: select +} diff --git a/test/Transforms/InstCombine/strcpy-1.ll b/test/Transforms/InstCombine/strcpy-1.ll new file mode 100644 index 00000000000..b6cf048b2a8 --- /dev/null +++ b/test/Transforms/InstCombine/strcpy-1.ll @@ -0,0 +1,45 @@ +; Test that the strcpy library call simplifier works correctly. +; rdar://6839935 +; RUN: opt < %s -instcombine -S | FileCheck %s +; +; This transformation requires the pointer size, as it assumes that size_t is +; the size of a pointer. +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" + +@hello = constant [6 x i8] c"hello\00" +@a = common global [32 x i8] zeroinitializer, align 1 +@b = common global [32 x i8] zeroinitializer, align 1 + +declare i8* @strcpy(i8*, i8*) + +define void @test_simplify1() { +; CHECK: @test_simplify1 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8]* @hello, i32 0, i32 0 + + call i8* @strcpy(i8* %dst, i8* %src) +; CHECK: @llvm.memcpy.p0i8.p0i8.i32 + ret void +} + +define i8* @test_simplify2() { +; CHECK: @test_simplify2 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + + %ret = call i8* @strcpy(i8* %dst, i8* %dst) +; CHECK: ret i8* getelementptr inbounds ([32 x i8]* @a, i32 0, i32 0) + ret i8* %ret +} + +define i8* @test_no_simplify1() { +; CHECK: @test_no_simplify1 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [32 x i8]* @b, i32 0, i32 0 + + %ret = call i8* @strcpy(i8* %dst, i8* %src) +; CHECK: call i8* @strcpy + ret i8* %ret +} diff --git a/test/Transforms/InstCombine/strcpy-2.ll b/test/Transforms/InstCombine/strcpy-2.ll new file mode 100644 index 00000000000..779e9fdd959 --- /dev/null +++ b/test/Transforms/InstCombine/strcpy-2.ll @@ -0,0 +1,22 @@ +; Test that the strcpy library call simplifier works correctly. +; RUN: opt < %s -instcombine -S | FileCheck %s +; +; This transformation requires the pointer size, as it assumes that size_t is +; the size of a pointer. +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" + +@hello = constant [6 x i8] c"hello\00" +@a = common global [32 x i8] zeroinitializer, align 1 + +declare i16* @strcpy(i8*, i8*) + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8]* @hello, i32 0, i32 0 + + call i16* @strcpy(i8* %dst, i8* %src) +; CHECK: call i16* @strcpy + ret void +} diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll index c03e8a348b8..3e48f4fd305 100644 --- a/test/Transforms/InstCombine/strcpy_chk-1.ll +++ b/test/Transforms/InstCombine/strcpy_chk-1.ll @@ -7,16 +7,16 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 @a = common global [60 x i8] zeroinitializer, align 1 @b = common global [60 x i8] zeroinitializer, align 1 -@.str = private constant [8 x i8] c"abcdefg\00" +@.str = private constant [12 x i8] c"abcdefghijk\00" ; Check cases where slen >= strlen (src). define void @test_simplify1() { ; CHECK: @test_simplify1 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @strcpy +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60) ret void } @@ -24,19 +24,19 @@ define void @test_simplify1() { define void @test_simplify2() { ; CHECK: @test_simplify2 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @strcpy - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12) ret void } define void @test_simplify3() { ; CHECK: @test_simplify3 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @strcpy +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1) ret void } @@ -53,36 +53,42 @@ define void @test_simplify4() { ret void } -define void @test_no_simplify1() { -; CHECK: @test_no_simplify1 +; Check case where the string length is not constant. + +define void @test_simplify5() { +; CHECK: @test_simplify5 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @__strcpy_chk - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8) +; CHECK: @__memcpy_chk + %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false) + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len) ret void } -; Check case were slen < strlen (src). +; Check case where the source and destination are the same. -define void @test_no_simplify2() { -; CHECK: @test_no_simplify2 +define i8* @test_simplify6() { +; CHECK: @test_simplify6 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @__strcpy_chk - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 3) - ret void +; CHECK: getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0) + %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false) + %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len) + ret i8* %ret } -define void @test_no_simplify3() { -; CHECK: @test_no_simplify3 +; Check case where slen < strlen (src). + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 ; CHECK-NEXT: call i8* @__strcpy_chk - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 0) + call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8) ret void } declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind +declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll new file mode 100644 index 00000000000..5caaffc8dde --- /dev/null +++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll @@ -0,0 +1,27 @@ +; RUN: opt < %s -loop-vectorize -dce + +; Check that we don't fall into an infinite loop. +define void @test() nounwind { +entry: + br label %for.body + +for.body: + %0 = phi i32 [ 1, %entry ], [ 0, %for.body ] + br label %for.body +} + + + +define void @test2() nounwind { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv47 = phi i64 [ 0, %entry ], [ %indvars.iv.next48, %for.body ] + %0 = phi i32 [ 1, %entry ], [ 0, %for.body ] + %indvars.iv.next48 = add i64 %indvars.iv47, 1 + br i1 undef, label %for.end, label %for.body + +for.end: ; preds = %for.body + unreachable +} diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll new file mode 100644 index 00000000000..6fb1792b2c8 --- /dev/null +++ b/test/Transforms/LoopVectorize/gcc-examples.ll @@ -0,0 +1,648 @@ +; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 +@G = common global [32 x [1024 x i32]] zeroinitializer, align 16 +@ub = common global [1024 x i32] zeroinitializer, align 16 +@uc = common global [1024 x i32] zeroinitializer, align 16 +@d = common global [2048 x i32] zeroinitializer, align 16 +@fa = common global [1024 x float] zeroinitializer, align 16 +@fb = common global [1024 x float] zeroinitializer, align 16 +@ic = common global [1024 x i32] zeroinitializer, align 16 +@da = common global [1024 x float] zeroinitializer, align 16 +@db = common global [1024 x float] zeroinitializer, align 16 +@dc = common global [1024 x float] zeroinitializer, align 16 +@dd = common global [1024 x float] zeroinitializer, align 16 +@dj = common global [1024 x i32] zeroinitializer, align 16 + +;CHECK: @example1 +;CHECK: load <4 x i32> +;CHECK: add <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example1() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %6, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret void +} + +;CHECK: @example2 +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example2(i32 %n, i32 %x) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph5, label %.preheader + +..preheader_crit_edge: ; preds = %.lr.ph5 + %phitmp = sext i32 %n to i64 + br label %.preheader + +.preheader: ; preds = %..preheader_crit_edge, %0 + %i.0.lcssa = phi i64 [ %phitmp, %..preheader_crit_edge ], [ 0, %0 ] + %2 = icmp eq i32 %n, 0 + br i1 %2, label %._crit_edge, label %.lr.ph + +.lr.ph5: ; preds = %0, %.lr.ph5 + %indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ] + %3 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv6 + store i32 %x, i32* %3, align 4 + %indvars.iv.next7 = add i64 %indvars.iv6, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next7 to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %..preheader_crit_edge, label %.lr.ph5 + +.lr.ph: ; preds = %.preheader, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ] + %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ] + %4 = add nsw i32 %.02, -1 + %5 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %6 = load i32* %5, align 4 + %7 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %8 = load i32* %7, align 4 + %9 = and i32 %8, %6 + %10 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %9, i32* %10, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %11 = icmp eq i32 %4, 0 + br i1 %11, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %.preheader + ret void +} + +; We can't vectorize this loop because it has non constant loop bounds. +;CHECK: @example3 +;CHECK-NOT: <4 x i32> +;CHECK: ret void +define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp { + %1 = icmp eq i32 %n, 0 + br i1 %1, label %._crit_edge, label %.lr.ph + +.lr.ph: ; preds = %0, %.lr.ph + %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ] + %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ] + %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ] + %2 = add nsw i32 %.05, -1 + %3 = getelementptr inbounds i32* %.023, i64 1 + %4 = load i32* %.023, align 16 + %5 = getelementptr inbounds i32* %.014, i64 1 + store i32 %4, i32* %.014, align 16 + %6 = icmp eq i32 %2, 0 + br i1 %6, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret void +} + +;CHECK: @example4 +;CHECK: load <4 x i32> +;CHECK: ret void +define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp { + %1 = add nsw i32 %n, -1 + %2 = icmp eq i32 %n, 0 + br i1 %2, label %.preheader4, label %.lr.ph10 + +.preheader4: ; preds = %0 + %3 = icmp sgt i32 %1, 0 + br i1 %3, label %.lr.ph6, label %._crit_edge + +.lr.ph10: ; preds = %0, %.lr.ph10 + %4 = phi i32 [ %9, %.lr.ph10 ], [ %1, %0 ] + %.018 = phi i32* [ %8, %.lr.ph10 ], [ %p, %0 ] + %.027 = phi i32* [ %5, %.lr.ph10 ], [ %q, %0 ] + %5 = getelementptr inbounds i32* %.027, i64 1 + %6 = load i32* %.027, align 16 + %7 = add nsw i32 %6, 5 + %8 = getelementptr inbounds i32* %.018, i64 1 + store i32 %7, i32* %.018, align 16 + %9 = add nsw i32 %4, -1 + %10 = icmp eq i32 %4, 0 + br i1 %10, label %._crit_edge, label %.lr.ph10 + +.preheader: ; preds = %.lr.ph6 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph6: ; preds = %.preheader4, %.lr.ph6 + %indvars.iv11 = phi i64 [ %indvars.iv.next12, %.lr.ph6 ], [ 0, %.preheader4 ] + %indvars.iv.next12 = add i64 %indvars.iv11, 1 + %11 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv.next12 + %12 = load i32* %11, align 4 + %13 = add nsw i64 %indvars.iv11, 3 + %14 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %13 + %15 = load i32* %14, align 4 + %16 = add nsw i32 %15, %12 + %17 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv11 + store i32 %16, i32* %17, align 4 + %lftr.wideiv13 = trunc i64 %indvars.iv.next12 to i32 + %exitcond14 = icmp eq i32 %lftr.wideiv13, %1 + br i1 %exitcond14, label %.preheader, label %.lr.ph6 + +.lr.ph: ; preds = %.preheader, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.preheader ] + %18 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %19 = load i32* %18, align 4 + %20 = icmp sgt i32 %19, 4 + %21 = select i1 %20, i32 4, i32 0 + %22 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + store i32 %21, i32* %22, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %1 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph10, %.preheader4, %.lr.ph, %.preheader + ret void +} + +;CHECK: @example8 +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example8(i32 %x) nounwind uwtable ssp { + br label %.preheader + +.preheader: ; preds = %3, %0 + %indvars.iv3 = phi i64 [ 0, %0 ], [ %indvars.iv.next4, %3 ] + br label %1 + +; <label>:1 ; preds = %1, %.preheader + %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [32 x [1024 x i32]]* @G, i64 0, i64 %indvars.iv3, i64 %indvars.iv + store i32 %x, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %3, label %1 + +; <label>:3 ; preds = %1 + %indvars.iv.next4 = add i64 %indvars.iv3, 1 + %lftr.wideiv5 = trunc i64 %indvars.iv.next4 to i32 + %exitcond6 = icmp eq i32 %lftr.wideiv5, 32 + br i1 %exitcond6, label %4, label %.preheader + +; <label>:4 ; preds = %3 + ret void +} + +;CHECK: @example9 +;CHECK: phi <4 x i32> +;CHECK: ret i32 +define i32 @example9() nounwind uwtable readonly ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %diff.01 = phi i32 [ 0, %0 ], [ %7, %1 ] + %2 = getelementptr inbounds [1024 x i32]* @ub, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [1024 x i32]* @uc, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add i32 %3, %diff.01 + %7 = sub i32 %6, %5 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret i32 %7 +} + +;CHECK: @example10a +;CHECK: load <4 x i16> +;CHECK: add <4 x i16> +;CHECK: store <4 x i16> +;CHECK: ret void +define void @example10a(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds i32* %ib, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds i32* %ic, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds i32* %ia, i64 %indvars.iv + store i32 %6, i32* %7, align 4 + %8 = getelementptr inbounds i16* %sb, i64 %indvars.iv + %9 = load i16* %8, align 2 + %10 = getelementptr inbounds i16* %sc, i64 %indvars.iv + %11 = load i16* %10, align 2 + %12 = add i16 %11, %9 + %13 = getelementptr inbounds i16* %sa, i64 %indvars.iv + store i16 %12, i16* %13, align 2 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %14, label %1 + +; <label>:14 ; preds = %1 + ret void +} + +;CHECK: @example10b +;CHECK: load <4 x i16> +;CHECK: sext <4 x i16> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv + %3 = load i16* %2, align 2 + %4 = sext i16 %3 to i32 + %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv + store i32 %4, i32* %5, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %6, label %1 + +; <label>:6 ; preds = %1 + ret void +} + +;CHECK: @example11 +;CHECK: load i32 +;CHECK: load i32 +;CHECK: load i32 +;CHECK: load i32 +;CHECK: insertelement +;CHECK: insertelement +;CHECK: insertelement +;CHECK: insertelement +;CHECK: ret void +define void @example11() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = shl nsw i64 %indvars.iv, 1 + %3 = or i64 %2, 1 + %4 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %3 + %5 = load i32* %4, align 4 + %6 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %3 + %7 = load i32* %6, align 4 + %8 = mul nsw i32 %7, %5 + %9 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %2 + %10 = load i32* %9, align 8 + %11 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %2 + %12 = load i32* %11, align 8 + %13 = mul nsw i32 %12, %10 + %14 = sub nsw i32 %8, %13 + %15 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %14, i32* %15, align 4 + %16 = mul nsw i32 %7, %10 + %17 = mul nsw i32 %12, %5 + %18 = add nsw i32 %17, %16 + %19 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv + store i32 %18, i32* %19, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 512 + br i1 %exitcond, label %20, label %1 + +; <label>:20 ; preds = %1 + ret void +} + +;CHECK: @example12 +;CHECK: trunc <4 x i64> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example12() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %3 = trunc i64 %indvars.iv to i32 + store i32 %3, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %4, label %1 + +; <label>:4 ; preds = %1 + ret void +} + +; Can't vectorize because of reductions. +;CHECK: @example13 +;CHECK-NOT: <4 x i32> +;CHECK: ret void +define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp { + br label %.preheader + +.preheader: ; preds = %14, %0 + %indvars.iv4 = phi i64 [ 0, %0 ], [ %indvars.iv.next5, %14 ] + %1 = getelementptr inbounds i32** %A, i64 %indvars.iv4 + %2 = load i32** %1, align 8 + %3 = getelementptr inbounds i32** %B, i64 %indvars.iv4 + %4 = load i32** %3, align 8 + br label %5 + +; <label>:5 ; preds = %.preheader, %5 + %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %5 ] + %diff.02 = phi i32 [ 0, %.preheader ], [ %11, %5 ] + %6 = getelementptr inbounds i32* %2, i64 %indvars.iv + %7 = load i32* %6, align 4 + %8 = getelementptr inbounds i32* %4, i64 %indvars.iv + %9 = load i32* %8, align 4 + %10 = add i32 %7, %diff.02 + %11 = sub i32 %10, %9 + %indvars.iv.next = add i64 %indvars.iv, 8 + %12 = trunc i64 %indvars.iv.next to i32 + %13 = icmp slt i32 %12, 1024 + br i1 %13, label %5, label %14 + +; <label>:14 ; preds = %5 + %15 = getelementptr inbounds i32* %out, i64 %indvars.iv4 + store i32 %11, i32* %15, align 4 + %indvars.iv.next5 = add i64 %indvars.iv4, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next5 to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 32 + br i1 %exitcond, label %16, label %.preheader + +; <label>:16 ; preds = %14 + ret void +} + +; Can't vectorize because of reductions. +;CHECK: @example14 +;CHECK-NOT: <4 x i32> +;CHECK: ret void +define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp { +.preheader3: + br label %.preheader + +.preheader: ; preds = %11, %.preheader3 + %indvars.iv7 = phi i64 [ 0, %.preheader3 ], [ %indvars.iv.next8, %11 ] + %sum.05 = phi i32 [ 0, %.preheader3 ], [ %10, %11 ] + br label %0 + +; <label>:0 ; preds = %0, %.preheader + %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %0 ] + %sum.12 = phi i32 [ %sum.05, %.preheader ], [ %10, %0 ] + %1 = getelementptr inbounds i32** %in, i64 %indvars.iv + %2 = load i32** %1, align 8 + %3 = getelementptr inbounds i32* %2, i64 %indvars.iv7 + %4 = load i32* %3, align 4 + %5 = getelementptr inbounds i32** %coeff, i64 %indvars.iv + %6 = load i32** %5, align 8 + %7 = getelementptr inbounds i32* %6, i64 %indvars.iv7 + %8 = load i32* %7, align 4 + %9 = mul nsw i32 %8, %4 + %10 = add nsw i32 %9, %sum.12 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %11, label %0 + +; <label>:11 ; preds = %0 + %indvars.iv.next8 = add i64 %indvars.iv7, 1 + %lftr.wideiv9 = trunc i64 %indvars.iv.next8 to i32 + %exitcond10 = icmp eq i32 %lftr.wideiv9, 32 + br i1 %exitcond10, label %.preheader3.1, label %.preheader + +.preheader3.1: ; preds = %11 + store i32 %10, i32* %out, align 4 + br label %.preheader.1 + +.preheader.1: ; preds = %24, %.preheader3.1 + %indvars.iv7.1 = phi i64 [ 0, %.preheader3.1 ], [ %indvars.iv.next8.1, %24 ] + %sum.05.1 = phi i32 [ 0, %.preheader3.1 ], [ %23, %24 ] + br label %12 + +; <label>:12 ; preds = %12, %.preheader.1 + %indvars.iv.1 = phi i64 [ 0, %.preheader.1 ], [ %13, %12 ] + %sum.12.1 = phi i32 [ %sum.05.1, %.preheader.1 ], [ %23, %12 ] + %13 = add nsw i64 %indvars.iv.1, 1 + %14 = getelementptr inbounds i32** %in, i64 %13 + %15 = load i32** %14, align 8 + %16 = getelementptr inbounds i32* %15, i64 %indvars.iv7.1 + %17 = load i32* %16, align 4 + %18 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.1 + %19 = load i32** %18, align 8 + %20 = getelementptr inbounds i32* %19, i64 %indvars.iv7.1 + %21 = load i32* %20, align 4 + %22 = mul nsw i32 %21, %17 + %23 = add nsw i32 %22, %sum.12.1 + %lftr.wideiv.1 = trunc i64 %13 to i32 + %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 1024 + br i1 %exitcond.1, label %24, label %12 + +; <label>:24 ; preds = %12 + %indvars.iv.next8.1 = add i64 %indvars.iv7.1, 1 + %lftr.wideiv9.1 = trunc i64 %indvars.iv.next8.1 to i32 + %exitcond10.1 = icmp eq i32 %lftr.wideiv9.1, 32 + br i1 %exitcond10.1, label %.preheader3.2, label %.preheader.1 + +.preheader3.2: ; preds = %24 + %25 = getelementptr inbounds i32* %out, i64 1 + store i32 %23, i32* %25, align 4 + br label %.preheader.2 + +.preheader.2: ; preds = %38, %.preheader3.2 + %indvars.iv7.2 = phi i64 [ 0, %.preheader3.2 ], [ %indvars.iv.next8.2, %38 ] + %sum.05.2 = phi i32 [ 0, %.preheader3.2 ], [ %37, %38 ] + br label %26 + +; <label>:26 ; preds = %26, %.preheader.2 + %indvars.iv.2 = phi i64 [ 0, %.preheader.2 ], [ %indvars.iv.next.2, %26 ] + %sum.12.2 = phi i32 [ %sum.05.2, %.preheader.2 ], [ %37, %26 ] + %27 = add nsw i64 %indvars.iv.2, 2 + %28 = getelementptr inbounds i32** %in, i64 %27 + %29 = load i32** %28, align 8 + %30 = getelementptr inbounds i32* %29, i64 %indvars.iv7.2 + %31 = load i32* %30, align 4 + %32 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.2 + %33 = load i32** %32, align 8 + %34 = getelementptr inbounds i32* %33, i64 %indvars.iv7.2 + %35 = load i32* %34, align 4 + %36 = mul nsw i32 %35, %31 + %37 = add nsw i32 %36, %sum.12.2 + %indvars.iv.next.2 = add i64 %indvars.iv.2, 1 + %lftr.wideiv.2 = trunc i64 %indvars.iv.next.2 to i32 + %exitcond.2 = icmp eq i32 %lftr.wideiv.2, 1024 + br i1 %exitcond.2, label %38, label %26 + +; <label>:38 ; preds = %26 + %indvars.iv.next8.2 = add i64 %indvars.iv7.2, 1 + %lftr.wideiv9.2 = trunc i64 %indvars.iv.next8.2 to i32 + %exitcond10.2 = icmp eq i32 %lftr.wideiv9.2, 32 + br i1 %exitcond10.2, label %.preheader3.3, label %.preheader.2 + +.preheader3.3: ; preds = %38 + %39 = getelementptr inbounds i32* %out, i64 2 + store i32 %37, i32* %39, align 4 + br label %.preheader.3 + +.preheader.3: ; preds = %52, %.preheader3.3 + %indvars.iv7.3 = phi i64 [ 0, %.preheader3.3 ], [ %indvars.iv.next8.3, %52 ] + %sum.05.3 = phi i32 [ 0, %.preheader3.3 ], [ %51, %52 ] + br label %40 + +; <label>:40 ; preds = %40, %.preheader.3 + %indvars.iv.3 = phi i64 [ 0, %.preheader.3 ], [ %indvars.iv.next.3, %40 ] + %sum.12.3 = phi i32 [ %sum.05.3, %.preheader.3 ], [ %51, %40 ] + %41 = add nsw i64 %indvars.iv.3, 3 + %42 = getelementptr inbounds i32** %in, i64 %41 + %43 = load i32** %42, align 8 + %44 = getelementptr inbounds i32* %43, i64 %indvars.iv7.3 + %45 = load i32* %44, align 4 + %46 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.3 + %47 = load i32** %46, align 8 + %48 = getelementptr inbounds i32* %47, i64 %indvars.iv7.3 + %49 = load i32* %48, align 4 + %50 = mul nsw i32 %49, %45 + %51 = add nsw i32 %50, %sum.12.3 + %indvars.iv.next.3 = add i64 %indvars.iv.3, 1 + %lftr.wideiv.3 = trunc i64 %indvars.iv.next.3 to i32 + %exitcond.3 = icmp eq i32 %lftr.wideiv.3, 1024 + br i1 %exitcond.3, label %52, label %40 + +; <label>:52 ; preds = %40 + %indvars.iv.next8.3 = add i64 %indvars.iv7.3, 1 + %lftr.wideiv9.3 = trunc i64 %indvars.iv.next8.3 to i32 + %exitcond10.3 = icmp eq i32 %lftr.wideiv9.3, 32 + br i1 %exitcond10.3, label %53, label %.preheader.3 + +; <label>:53 ; preds = %52 + %54 = getelementptr inbounds i32* %out, i64 3 + store i32 %51, i32* %54, align 4 + ret void +} + +; Can't vectorize because the src and dst pointers are not disjoint. +;CHECK: @example21 +;CHECK-NOT: <4 x i32> +;CHECK: ret i32 +define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %2 = sext i32 %n to i64 + br label %3 + +; <label>:3 ; preds = %.lr.ph, %3 + %indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ] + %a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ] + %indvars.iv.next = add i64 %indvars.iv, -1 + %4 = getelementptr inbounds i32* %b, i64 %indvars.iv.next + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %a.02 + %7 = trunc i64 %indvars.iv.next to i32 + %8 = icmp sgt i32 %7, 0 + br i1 %8, label %3, label %._crit_edge + +._crit_edge: ; preds = %3, %0 + %a.0.lcssa = phi i32 [ 0, %0 ], [ %6, %3 ] + ret i32 %a.0.lcssa +} + +; Can't vectorize because there are multiple PHIs. +;CHECK: @example23 +;CHECK-NOT: <4 x i32> +;CHECK: ret void +define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %.04 = phi i16* [ %src, %0 ], [ %2, %1 ] + %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ] + %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ] + %2 = getelementptr inbounds i16* %.04, i64 1 + %3 = load i16* %.04, align 2 + %4 = zext i16 %3 to i32 + %5 = shl nuw nsw i32 %4, 7 + %6 = getelementptr inbounds i32* %.013, i64 1 + store i32 %5, i32* %.013, align 4 + %7 = add nsw i32 %i.02, 1 + %exitcond = icmp eq i32 %7, 256 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret void +} + +;CHECK: @example24 +;CHECK: shufflevector <4 x i16> +;CHECK: ret void +define void @example24(i16 signext %x, i16 signext %y) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [1024 x float]* @fa, i64 0, i64 %indvars.iv + %3 = load float* %2, align 4 + %4 = getelementptr inbounds [1024 x float]* @fb, i64 0, i64 %indvars.iv + %5 = load float* %4, align 4 + %6 = fcmp olt float %3, %5 + %x.y = select i1 %6, i16 %x, i16 %y + %7 = sext i16 %x.y to i32 + %8 = getelementptr inbounds [1024 x i32]* @ic, i64 0, i64 %indvars.iv + store i32 %7, i32* %8, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %9, label %1 + +; <label>:9 ; preds = %1 + ret void +} + +;CHECK: @example25 +;CHECK: and <4 x i1> +;CHECK: zext <4 x i1> +;CHECK: ret void +define void @example25() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [1024 x float]* @da, i64 0, i64 %indvars.iv + %3 = load float* %2, align 4 + %4 = getelementptr inbounds [1024 x float]* @db, i64 0, i64 %indvars.iv + %5 = load float* %4, align 4 + %6 = fcmp olt float %3, %5 + %7 = getelementptr inbounds [1024 x float]* @dc, i64 0, i64 %indvars.iv + %8 = load float* %7, align 4 + %9 = getelementptr inbounds [1024 x float]* @dd, i64 0, i64 %indvars.iv + %10 = load float* %9, align 4 + %11 = fcmp olt float %8, %10 + %12 = and i1 %6, %11 + %13 = zext i1 %12 to i32 + %14 = getelementptr inbounds [1024 x i32]* @dj, i64 0, i64 %indvars.iv + store i32 %13, i32* %14, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %15, label %1 + +; <label>:15 ; preds = %1 + ret void +} + diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll new file mode 100644 index 00000000000..e818d685626 --- /dev/null +++ b/test/Transforms/LoopVectorize/increment.ll @@ -0,0 +1,66 @@ +; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@a = common global [2048 x i32] zeroinitializer, align 16 + +; This is the loop. +; for (i=0; i<n; i++){ +; a[i] += i; +; } +;CHECK: @inc +;CHECK: load <4 x i32> +;CHECK: add <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @inc(i32 %n) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = trunc i64 %indvars.iv to i32 + %5 = add nsw i32 %3, %4 + store i32 %5, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret void +} + +; Can't vectorize this loop because the access to A[X] is non linear. +; +; for (i = 0; i < n; ++i) { +; A[B[i]]++; +; +;CHECK: @histogram +;CHECK-NOT: <4 x i32> +;CHECK: ret i32 +define i32 @histogram(i32* nocapture noalias %A, i32* nocapture noalias %B, i32 %n) nounwind uwtable ssp { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %idxprom1 = sext i32 %0 to i64 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1 + %1 = load i32* %arrayidx2, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret i32 0 +} diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll new file mode 100644 index 00000000000..bd90113e523 --- /dev/null +++ b/test/Transforms/LoopVectorize/induction_plus.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -loop-vectorize -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@array = common global [1024 x i32] zeroinitializer, align 16 + +;CHECK: @array_at_plus_one +;CHECK: add <4 x i64> +;CHECK: trunc <4 x i64> +;CHECK: add i64 %index, 12 +;CHECK: ret i32 +define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %2 = add nsw i64 %indvars.iv, 12 + %3 = getelementptr inbounds [1024 x i32]* @array, i64 0, i64 %2 + %4 = trunc i64 %indvars.iv to i32 + store i32 %4, i32* %3, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} diff --git a/test/Transforms/LoopVectorize/lit.local.cfg b/test/Transforms/LoopVectorize/lit.local.cfg new file mode 100644 index 00000000000..19eebc0ac7a --- /dev/null +++ b/test/Transforms/LoopVectorize/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.ll', '.c', '.cpp'] diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll new file mode 100644 index 00000000000..04c5c84a4f4 --- /dev/null +++ b/test/Transforms/LoopVectorize/non-const-n.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 + +;CHECK: @example1 +;CHECK: shl i32 +;CHECK: zext i32 +;CHECK: load <4 x i32> +;CHECK: add <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example1(i32 %n) nounwind uwtable ssp { + %n4 = shl i32 %n, 2 + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %6, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n4 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret void +} + diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll new file mode 100644 index 00000000000..4095ea68ef7 --- /dev/null +++ b/test/Transforms/LoopVectorize/read-only.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @read_only_func +;CHECK: load <4 x i32> +;CHECK: ret i32 +define i32 @read_only_func(i32* nocapture %A, i32* nocapture %B, i32 %n) nounwind uwtable readonly ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds i32* %A, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = add nsw i64 %indvars.iv, 13 + %5 = getelementptr inbounds i32* %B, i64 %4 + %6 = load i32* %5, align 4 + %7 = shl i32 %6, 1 + %8 = add i32 %3, %sum.02 + %9 = add i32 %8, %7 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] + ret i32 %sum.0.lcssa +} diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll new file mode 100644 index 00000000000..3e871b229b5 --- /dev/null +++ b/test/Transforms/LoopVectorize/reduction.ll @@ -0,0 +1,152 @@ +; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @reduction_sum +;CHECK: phi <4 x i32> +;CHECK: load <4 x i32> +;CHECK: add <4 x i32> +;CHECK: ret i32 +define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds i32* %A, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds i32* %B, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = trunc i64 %indvars.iv to i32 + %7 = add i32 %sum.02, %6 + %8 = add i32 %7, %3 + %9 = add i32 %8, %5 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +;CHECK: @reduction_prod +;CHECK: phi <4 x i32> +;CHECK: load <4 x i32> +;CHECK: mul <4 x i32> +;CHECK: ret i32 +define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ] + %2 = getelementptr inbounds i32* %A, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds i32* %B, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = trunc i64 %indvars.iv to i32 + %7 = mul i32 %prod.02, %6 + %8 = mul i32 %7, %3 + %9 = mul i32 %8, %5 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %prod.0.lcssa = phi i32 [ 1, %0 ], [ %9, %.lr.ph ] + ret i32 %prod.0.lcssa +} + +;CHECK: @reduction_mix +;CHECK: phi <4 x i32> +;CHECK: load <4 x i32> +;CHECK: mul <4 x i32> +;CHECK: ret i32 +define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds i32* %A, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds i32* %B, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = mul nsw i32 %5, %3 + %7 = trunc i64 %indvars.iv to i32 + %8 = add i32 %sum.02, %7 + %9 = add i32 %8, %6 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +;CHECK: @reduction_mul +;CHECK: mul <4 x i32> +;CHECK: ret i32 +define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %sum.02 = phi i32 [ %9, %.lr.ph ], [ 19, %0 ] + %2 = getelementptr inbounds i32* %A, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds i32* %B, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = trunc i64 %indvars.iv to i32 + %7 = add i32 %3, %6 + %8 = add i32 %7, %5 + %9 = mul i32 %8, %sum.02 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +;CHECK: @start_at_non_zero +;CHECK: phi <4 x i32> +;CHECK: <i32 120, i32 0, i32 0, i32 0> +;CHECK: ret i32 +define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ] + %arrayidx = getelementptr inbounds i32* %in, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32* %coeff, i64 %indvars.iv + %1 = load i32* %arrayidx2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, %sum.09 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ 120, %entry ], [ %add, %for.body ] + ret i32 %sum.0.lcssa +} + + diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll new file mode 100644 index 00000000000..8d5b6fd8aff --- /dev/null +++ b/test/Transforms/LoopVectorize/scalar-select.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@a = common global [2048 x i32] zeroinitializer, align 16 +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 + +;CHECK: @example1 +;CHECK: load <4 x i32> +; make sure that we have a scalar condition and a vector operand. +;CHECK: select i1 %cond, <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example1(i1 %cond) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %sel = select i1 %cond, i32 %6, i32 zeroinitializer + store i32 %sel, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret void +} + diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index 644fda167d4..03120f7a327 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -1063,3 +1063,23 @@ entry: call void @llvm.lifetime.end(i64 -1, i8* %0) ret void } + +define void @PR14105({ [16 x i8] }* %ptr) { +; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is +; sign as negative. We use a volatile memcpy to ensure promotion never actually +; occurs. +; CHECK: @PR14105 + +entry: + %a = alloca { [16 x i8] }, align 8 +; CHECK: alloca [16 x i8], align 8 + + %gep = getelementptr inbounds { [16 x i8] }* %ptr, i64 -1 +; CHECK-NEXT: getelementptr inbounds { [16 x i8] }* %ptr, i64 -1, i32 0, i64 0 + + %cast1 = bitcast { [16 x i8 ] }* %gep to i8* + %cast2 = bitcast { [16 x i8 ] }* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast2, i32 16, i32 8, i1 true) + ret void +; CHECK: ret +} diff --git a/test/Transforms/SimplifyLibCalls/FFS.ll b/test/Transforms/SimplifyLibCalls/FFS.ll index e38d78349d4..6aecbeacd7e 100644 --- a/test/Transforms/SimplifyLibCalls/FFS.ll +++ b/test/Transforms/SimplifyLibCalls/FFS.ll @@ -1,6 +1,7 @@ -; Test that the ToAsciiOptimizer works correctly -; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep "call.*@ffs" +; Test that FFSOpt works correctly +; RUN: opt < %s -simplify-libcalls -S | FileCheck %s + +; CHECK-NOT: call{{.*}}@ffs @non_const = external global i32 ; <i32*> [#uses=1] @@ -34,3 +35,11 @@ define i32 @a(i64) nounwind { %2 = call i32 @ffsll(i64 %0) ; <i32> [#uses=1] ret i32 %2 } + +; PR13028 +define i32 @b() nounwind { + %ffs = call i32 @ffsll(i64 0) + ret i32 %ffs +; CHECK: @b +; CHECK-NEXT: ret i32 0 +} diff --git a/test/Transforms/SimplifyLibCalls/StrCpy.ll b/test/Transforms/SimplifyLibCalls/StrCpy.ll deleted file mode 100644 index 83406ff8f86..00000000000 --- a/test/Transforms/SimplifyLibCalls/StrCpy.ll +++ /dev/null @@ -1,37 +0,0 @@ -; Test that the StrCpyOptimizer works correctly -; RUN: opt < %s -simplify-libcalls -S | FileCheck %s - -; This transformation requires the pointer size, as it assumes that size_t is -; the size of a pointer. -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" - -@hello = constant [6 x i8] c"hello\00" - -declare i8* @strcpy(i8*, i8*) - -declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind - -declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly - -; rdar://6839935 - -define i32 @t1() { -; CHECK: @t1 - %target = alloca [1024 x i8] - %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 - %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 - %rslt1 = call i8* @strcpy( i8* %arg1, i8* %arg2 ) -; CHECK: @llvm.memcpy.p0i8.p0i8.i32 - ret i32 0 -} - -define i32 @t2() { -; CHECK: @t2 - %target = alloca [1024 x i8] - %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 - %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 - %tmp1 = call i32 @llvm.objectsize.i32(i8* %arg1, i1 false) - %rslt1 = call i8* @__strcpy_chk(i8* %arg1, i8* %arg2, i32 %tmp1) -; CHECK: @__memcpy_chk - ret i32 0 -} diff --git a/test/Transforms/TailCallElim/nocapture.ll b/test/Transforms/TailCallElim/nocapture.ll index 87cb9dd427b..5a1a9a6e7ce 100644 --- a/test/Transforms/TailCallElim/nocapture.ll +++ b/test/Transforms/TailCallElim/nocapture.ll @@ -1,9 +1,9 @@ ; RUN: opt %s -tailcallelim -S | FileCheck %s -; XFAIL: * declare void @use(i8* nocapture, i8* nocapture) +declare void @boring() -define i8* @foo(i8* nocapture %A, i1 %cond) { +define i8* @test1(i8* nocapture %A, i1 %cond) { ; CHECK: tailrecurse: ; CHECK: %A.tr = phi i8* [ %A, %0 ], [ %B, %cond_true ] ; CHECK: %cond.tr = phi i1 [ %cond, %0 ], [ false, %cond_true ] @@ -14,12 +14,27 @@ define i8* @foo(i8* nocapture %A, i1 %cond) { cond_true: ; CHECK: cond_true: ; CHECK: br label %tailrecurse - call i8* @foo(i8* %B, i1 false) + call i8* @test1(i8* %B, i1 false) ret i8* null cond_false: ; CHECK: cond_false call void @use(i8* %A, i8* %B) -; CHECK: tail call void @use(i8* %A.tr, i8* %B) +; CHECK: call void @use(i8* %A.tr, i8* %B) + call void @boring() +; CHECK: tail call void @boring() ret i8* null ; CHECK: ret i8* null } + +; PR14143 +define void @test2(i8* %a, i8* %b) { +; CHECK: @test2 +; CHECK-NOT: tail call +; CHECK: ret void + %c = alloca [100 x i8], align 16 + %tmp = bitcast [100 x i8]* %c to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %tmp, i64 100, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) diff --git a/test/lit.cfg b/test/lit.cfg index dc37317ba95..7e6760e95ae 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -146,13 +146,12 @@ if re.search(r'cygwin|mingw32|win32', mcjit_triple): mcjit_triple += "-elf" config.substitutions.append( ('%mcjit_triple', mcjit_triple) ) -# When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the -# triple so we can check it with XFAIL and XTARGET. -config.target_triple += lit.valgrindTriple - # Provide a substition for those tests that need to run the jit to obtain data # but simply want use the currently considered most reliable jit for platform -defaultIsMCJIT='true' if 'arm' in config.target_triple else 'false' +if 'arm' in config.target_triple: + defaultIsMCJIT = 'true' +else: + defaultIsMCJIT = 'false' config.substitutions.append( ('%defaultjit', '-use-mcjit='+defaultIsMCJIT) ) # Process jit implementation option @@ -242,6 +241,10 @@ else: if loadable_module: config.available_features.add('loadable_module') +# LTO +if config.lto_is_enabled == "1": + config.available_features.add('lto') + # llc knows whether he is compiled with -DNDEBUG. import subprocess try: diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in index 178b22f10f3..2bbe63e6348 100644 --- a/test/lit.site.cfg.in +++ b/test/lit.site.cfg.in @@ -11,6 +11,7 @@ config.python_executable = "@PYTHON_EXECUTABLE@" config.ocamlopt_executable = "@OCAMLOPT@" config.enable_shared = @ENABLE_SHARED@ config.enable_assertions = @ENABLE_ASSERTIONS@ +config.lto_is_enabled = "@LTO_IS_ENABLED@" config.targets_to_build = "@TARGETS_TO_BUILD@" config.llvm_bindings = "@LLVM_BINDINGS@" config.host_os = "@HOST_OS@" |