summaryrefslogtreecommitdiff
path: root/backend/src/libocl
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2014-09-01 10:19:42 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-09-04 12:10:03 +0800
commit21da440d810cfa1807ee417c8e225dbf2c02b6c7 (patch)
tree79e1c44d31f61fb3d26722476f10d34d8fcf6276 /backend/src/libocl
parent908a9539d9beb242ab247b290a118dd1c1e6414f (diff)
Add memcpy, memset and barrier bitcode files into libocl
Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@intel.com>
Diffstat (limited to 'backend/src/libocl')
-rw-r--r--backend/src/libocl/src/ocl_barrier.ll39
-rw-r--r--backend/src/libocl/src/ocl_memcpy.ll336
-rw-r--r--backend/src/libocl/src/ocl_memset.ll127
3 files changed, 502 insertions, 0 deletions
diff --git a/backend/src/libocl/src/ocl_barrier.ll b/backend/src/libocl/src/ocl_barrier.ll
new file mode 100644
index 00000000..4e55fcb7
--- /dev/null
+++ b/backend/src/libocl/src/ocl_barrier.ll
@@ -0,0 +1,39 @@
+;XXX FIXME as llvm can't use macros, we hardcoded 3, 1, 2
+;here, we may need to use a more grace way to handle this type
+;of values latter.
+;#define CLK_LOCAL_MEM_FENCE (1 << 0)
+;#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+
+declare i32 @_get_local_mem_fence() nounwind alwaysinline
+declare i32 @_get_global_mem_fence() nounwind alwaysinline
+declare void @__gen_ocl_barrier_local() nounwind alwaysinline noduplicate
+declare void @__gen_ocl_barrier_global() nounwind alwaysinline noduplicate
+declare void @__gen_ocl_barrier_local_and_global() nounwind alwaysinline noduplicate
+
+define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
+ %1 = icmp eq i32 %flags, 3
+ br i1 %1, label %barrier_local_global, label %barrier_local_check
+
+barrier_local_global:
+ call void @__gen_ocl_barrier_local_and_global()
+ br label %done
+
+barrier_local_check:
+ %2 = icmp eq i32 %flags, 1
+ br i1 %2, label %barrier_local, label %barrier_global_check
+
+barrier_local:
+ call void @__gen_ocl_barrier_local()
+ br label %done
+
+barrier_global_check:
+ %3 = icmp eq i32 %flags, 2
+ br i1 %3, label %barrier_global, label %done
+
+barrier_global:
+ call void @__gen_ocl_barrier_global()
+ br label %done
+
+done:
+ ret void
+}
diff --git a/backend/src/libocl/src/ocl_memcpy.ll b/backend/src/libocl/src/ocl_memcpy.ll
new file mode 100644
index 00000000..476033e0
--- /dev/null
+++ b/backend/src/libocl/src/ocl_memcpy.ll
@@ -0,0 +1,336 @@
+;The memcpy's source code.
+; INLINE_OVERLOADABLE void __gen_memcpy(uchar* dst, uchar* src, size_t size) {
+; size_t index = 0;
+; while((index + 4) >= size) {
+; *((uint *)(dst + index)) = *((uint *)(src + index));
+; index += 4;
+; }
+; while(index < size) {
+; dst[index] = src[index];
+; index++;
+; }
+; }
+
+define void @__gen_memcpy_gg(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+ %1 = load i32 addrspace(1)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+ store i32 %1, i32 addrspace(1)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+ %3 = load i8 addrspace(1)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_gp(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+ %1 = load i32 addrspace(0)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+ store i32 %1, i32 addrspace(1)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+ %3 = load i8 addrspace(0)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_gl(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+ %1 = load i32 addrspace(3)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+ store i32 %1, i32 addrspace(1)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+ %3 = load i8 addrspace(3)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_pg(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+ %1 = load i32 addrspace(1)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+ store i32 %1, i32 addrspace(0)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+ %3 = load i8 addrspace(1)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_pp(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+ %1 = load i32 addrspace(0)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+ store i32 %1, i32 addrspace(0)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+ %3 = load i8 addrspace(0)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_pl(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+ %1 = load i32 addrspace(3)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+ store i32 %1, i32 addrspace(0)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+ %3 = load i8 addrspace(3)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_lg(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+ %1 = load i32 addrspace(1)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+ store i32 %1, i32 addrspace(3)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+ %3 = load i8 addrspace(1)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_lp(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+ %1 = load i32 addrspace(0)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+ store i32 %1, i32 addrspace(3)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+ %3 = load i8 addrspace(0)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
+
+define void @__gen_memcpy_ll(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond3, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+ %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+ %1 = load i32 addrspace(3)* %0, align 4
+ %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+ %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+ store i32 %1, i32 addrspace(3)* %2, align 4
+ br label %while.cond
+
+while.cond3: ; preds = %while.cond, %while.body5
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+ %cmp4 = icmp ult i32 %index.1, %size
+ br i1 %cmp4, label %while.body5, label %while.end7
+
+while.body5: ; preds = %while.cond3
+ %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+ %3 = load i8 addrspace(3)* %arrayidx, align 1
+ %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+ store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond3
+
+while.end7: ; preds = %while.cond3
+ ret void
+}
diff --git a/backend/src/libocl/src/ocl_memset.ll b/backend/src/libocl/src/ocl_memset.ll
new file mode 100644
index 00000000..addf9f55
--- /dev/null
+++ b/backend/src/libocl/src/ocl_memset.ll
@@ -0,0 +1,127 @@
+;The memset's source code.
+; INLINE_OVERLOADABLE void __gen_memset(uchar* dst, uchar val, size_t size) {
+; size_t index = 0;
+; uint v = (val << 24) | (val << 16) | (val << 8) | val;
+; while((index + 4) >= size) {
+; *((uint *)(dst + index)) = v;
+; index += 4;
+; }
+; while(index < size) {
+; dst[index] = val;
+; index++;
+; }
+; }
+
+define void @__gen_memset_p(i8* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+entry:
+ %conv = zext i8 %val to i32
+ %shl = shl nuw i32 %conv, 24
+ %shl2 = shl nuw nsw i32 %conv, 16
+ %or = or i32 %shl, %shl2
+ %shl4 = shl nuw nsw i32 %conv, 8
+ %or5 = or i32 %or, %shl4
+ %or7 = or i32 %or5, %conv
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond10, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8* %dst, i32 %index.0
+ %0 = bitcast i8* %add.ptr to i32*
+ store i32 %or7, i32* %0, align 4
+ br label %while.cond
+
+while.cond10: ; preds = %while.cond, %while.body13
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
+ %cmp11 = icmp ult i32 %index.1, %size
+ br i1 %cmp11, label %while.body13, label %while.end14
+
+while.body13: ; preds = %while.cond10
+ %arrayidx = getelementptr inbounds i8* %dst, i32 %index.1
+ store i8 %val, i8* %arrayidx, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond10
+
+while.end14: ; preds = %while.cond10
+ ret void
+}
+
+define void @__gen_memset_g(i8 addrspace(1)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+entry:
+ %conv = zext i8 %val to i32
+ %shl = shl nuw i32 %conv, 24
+ %shl2 = shl nuw nsw i32 %conv, 16
+ %or = or i32 %shl, %shl2
+ %shl4 = shl nuw nsw i32 %conv, 8
+ %or5 = or i32 %or, %shl4
+ %or7 = or i32 %or5, %conv
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond10, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+ %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+ store i32 %or7, i32 addrspace(1)* %0, align 4
+ br label %while.cond
+
+while.cond10: ; preds = %while.cond, %while.body13
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
+ %cmp11 = icmp ult i32 %index.1, %size
+ br i1 %cmp11, label %while.body13, label %while.end14
+
+while.body13: ; preds = %while.cond10
+ %arrayidx = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+ store i8 %val, i8 addrspace(1)* %arrayidx, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond10
+
+while.end14: ; preds = %while.cond10
+ ret void
+}
+
+define void @__gen_memset_l(i8 addrspace(3)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline {
+entry:
+ %conv = zext i8 %val to i32
+ %shl = shl nuw i32 %conv, 24
+ %shl2 = shl nuw nsw i32 %conv, 16
+ %or = or i32 %shl, %shl2
+ %shl4 = shl nuw nsw i32 %conv, 8
+ %or5 = or i32 %or, %shl4
+ %or7 = or i32 %or5, %conv
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %entry
+ %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ %add = add i32 %index.0, 4
+ %cmp = icmp ult i32 %add, %size
+ br i1 %cmp, label %while.cond10, label %while.body
+
+while.body: ; preds = %while.cond
+ %add.ptr = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+ %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+ store i32 %or7, i32 addrspace(3)* %0, align 4
+ br label %while.cond
+
+while.cond10: ; preds = %while.cond, %while.body13
+ %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ]
+ %cmp11 = icmp ult i32 %index.1, %size
+ br i1 %cmp11, label %while.body13, label %while.end14
+
+while.body13: ; preds = %while.cond10
+ %arrayidx = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+ store i8 %val, i8 addrspace(3)* %arrayidx, align 1
+ %inc = add i32 %index.1, 1
+ br label %while.cond10
+
+while.end14: ; preds = %while.cond10
+ ret void
+}