summaryrefslogtreecommitdiff
path: root/test/Transforms/SROA/vector-promotion.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms/SROA/vector-promotion.ll')
-rw-r--r--test/Transforms/SROA/vector-promotion.ll191
1 files changed, 191 insertions, 0 deletions
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
new file mode 100644
index 00000000000..9cbab385c21
--- /dev/null
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -0,0 +1,191 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+%S1 = type { i64, [42 x float] }
+
+define i32 @test1(<4 x i32> %x, <4 x i32> %y) {
+; CHECK: @test1
+entry:
+ %a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+ %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+ store <4 x i32> %x, <4 x i32>* %a.x
+ %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+ store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+ %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+ %tmp1 = load i32* %a.tmp1
+ %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+ %tmp2 = load i32* %a.tmp2
+ %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+ %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: load
+; CHECK: extractelement <4 x i32> %x, i32 2
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 3
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 0
+
+ %tmp4 = add i32 %tmp1, %tmp2
+ %tmp5 = add i32 %tmp3, %tmp4
+ ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test2(<4 x i32> %x, <4 x i32> %y) {
+; CHECK: @test2
+; FIXME: This should be handled!
+entry:
+ %a = alloca [2 x <4 x i32>]
+; CHECK: alloca <4 x i32>
+
+ %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+ store <4 x i32> %x, <4 x i32>* %a.x
+ %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+ store <4 x i32> %y, <4 x i32>* %a.y
+
+ %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+ %tmp1 = load i32* %a.tmp1
+ %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+ %tmp2 = load i32* %a.tmp2
+ %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+ %a.tmp3.cast = bitcast i32* %a.tmp3 to <2 x i32>*
+ %tmp3.vec = load <2 x i32>* %a.tmp3.cast
+ %tmp3 = extractelement <2 x i32> %tmp3.vec, i32 0
+
+ %tmp4 = add i32 %tmp1, %tmp2
+ %tmp5 = add i32 %tmp3, %tmp4
+ ret i32 %tmp5
+}
+
+define i32 @test3(<4 x i32> %x, <4 x i32> %y) {
+; CHECK: @test3
+entry:
+ %a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+ %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+ store <4 x i32> %x, <4 x i32>* %a.x
+ %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+ store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+ %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+ call void @llvm.memset.p0i8.i32(i8* %a.y.cast, i8 0, i32 16, i32 1, i1 false)
+; CHECK-NOT: memset
+
+ %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+ %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+ call void @llvm.memset.p0i8.i32(i8* %a.tmp1.cast, i8 -1, i32 4, i32 1, i1 false)
+ %tmp1 = load i32* %a.tmp1
+ %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+ %tmp2 = load i32* %a.tmp2
+ %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+ %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: load
+; CHECK: %[[insert:.*]] = insertelement <4 x i32> %x, i32 -1, i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 3
+; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 0
+
+ %tmp4 = add i32 %tmp1, %tmp2
+ %tmp5 = add i32 %tmp3, %tmp4
+ ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test4(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
+; CHECK: @test4
+entry:
+ %a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+ %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+ store <4 x i32> %x, <4 x i32>* %a.x
+ %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+ store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+ %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+ %z.cast = bitcast <4 x i32>* %z to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.y.cast, i8* %z.cast, i32 16, i32 1, i1 false)
+; CHECK-NOT: memcpy
+
+ %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+ %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+ %z.tmp1 = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
+ %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.tmp1.cast, i8* %z.tmp1.cast, i32 4, i32 1, i1 false)
+ %tmp1 = load i32* %a.tmp1
+ %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+ %tmp2 = load i32* %a.tmp2
+ %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+ %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: memcpy
+; CHECK: %[[load:.*]] = load <4 x i32>* %z
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
+; CHECK-NEXT: %[[element_load:.*]] = load i32* %[[gep]]
+; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0
+
+ %tmp4 = add i32 %tmp1, %tmp2
+ %tmp5 = add i32 %tmp3, %tmp4
+ ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
+; CHECK: @test5
+; The same as the above, but with reversed source and destination for the
+; element memcpy, and a self copy.
+entry:
+ %a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+ %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+ store <4 x i32> %x, <4 x i32>* %a.x
+ %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+ store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+ %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+ %a.x.cast = bitcast <4 x i32>* %a.x to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.x.cast, i8* %a.y.cast, i32 16, i32 1, i1 false)
+; CHECK-NOT: memcpy
+
+ %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+ %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+ %z.tmp1 = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
+ %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z.tmp1.cast, i8* %a.tmp1.cast, i32 4, i32 1, i1 false)
+ %tmp1 = load i32* %a.tmp1
+ %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+ %tmp2 = load i32* %a.tmp2
+ %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+ %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: memcpy
+; CHECK: %[[gep:.*]] = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
+; CHECK-NEXT: %[[extract:.*]] = extractelement <4 x i32> %y, i32 2
+; CHECK-NEXT: store i32 %[[extract]], i32* %[[gep]]
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 2
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 3
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 0
+
+ %tmp4 = add i32 %tmp1, %tmp2
+ %tmp5 = add i32 %tmp3, %tmp4
+ ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind