diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-02-13 01:01:59 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-02-13 01:01:59 +0000 |
commit | ba011288a9659d57736956c47b8522eace520807 (patch) | |
tree | 9b09f077808a5517692f892bf2847663c3881ca4 | |
parent | b518692b52a0bbdf9cf0e2167b9629dd9501abcd (diff) |
Split sources for amdgcn and r600
Most files remain in a common amdgpu directory.
Also switches barriers to to use convergent,
and use llvm.amdgcn.s.barrier.
This now requires 3.9/trunk to build amdgcn.
git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@260777 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | amdgcn/lib/OVERRIDES | 0 | ||||
-rw-r--r-- | amdgcn/lib/SOURCES | 1 | ||||
-rw-r--r-- | amdgcn/lib/synchronization/barrier_impl.ll | 32 | ||||
-rw-r--r-- | amdgpu/lib/OVERRIDES | 2 | ||||
-rw-r--r-- | amdgpu/lib/SOURCES | 25 | ||||
-rw-r--r-- | amdgpu/lib/atomic/atomic.cl (renamed from r600/lib/atomic/atomic.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/get_image_attributes_impl.ll (renamed from r600/lib/image/get_image_attributes_impl.ll) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/get_image_channel_data_type.cl (renamed from r600/lib/image/get_image_channel_data_type.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/get_image_channel_order.cl (renamed from r600/lib/image/get_image_channel_order.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/get_image_depth.cl (renamed from r600/lib/image/get_image_depth.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/get_image_height.cl (renamed from r600/lib/image/get_image_height.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/get_image_width.cl (renamed from r600/lib/image/get_image_width.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/read_image_impl.ll (renamed from r600/lib/image/read_image_impl.ll) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/read_imagef.cl (renamed from r600/lib/image/read_imagef.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/read_imagei.cl (renamed from r600/lib/image/read_imagei.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/read_imageui.cl (renamed from r600/lib/image/read_imageui.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/write_image_impl.ll (renamed from r600/lib/image/write_image_impl.ll) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/write_imagef.cl (renamed from r600/lib/image/write_imagef.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/write_imagei.cl (renamed from r600/lib/image/write_imagei.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/image/write_imageui.cl (renamed from r600/lib/image/write_imageui.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/math/ldexp.cl (renamed from r600/lib/math/ldexp.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/math/nextafter.cl (renamed from r600/lib/math/nextafter.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/math/sqrt.cl (renamed from r600/lib/math/sqrt.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/synchronization/barrier.cl (renamed from r600/lib/synchronization/barrier.cl) | 0 | ||||
-rw-r--r-- | amdgpu/lib/workitem/get_global_size.ll (renamed from r600/lib/workitem/get_global_size.ll) | 0 | ||||
-rw-r--r-- | amdgpu/lib/workitem/get_group_id.ll (renamed from r600/lib/workitem/get_group_id.ll) | 0 | ||||
-rw-r--r-- | amdgpu/lib/workitem/get_local_id.ll (renamed from r600/lib/workitem/get_local_id.ll) | 0 | ||||
-rw-r--r-- | amdgpu/lib/workitem/get_local_size.ll (renamed from r600/lib/workitem/get_local_size.ll) | 0 | ||||
-rw-r--r-- | amdgpu/lib/workitem/get_num_groups.ll (renamed from r600/lib/workitem/get_num_groups.ll) | 0 | ||||
-rw-r--r-- | amdgpu/lib/workitem/get_work_dim.ll (renamed from r600/lib/workitem/get_work_dim.ll) | 0 | ||||
-rwxr-xr-x | configure.py | 8 | ||||
-rw-r--r-- | r600/lib/OVERRIDES | 2 | ||||
-rw-r--r-- | r600/lib/SOURCES | 25 | ||||
-rw-r--r-- | r600/lib/synchronization/barrier_impl.ll | 18 |
34 files changed, 75 insertions, 38 deletions
diff --git a/amdgcn/lib/OVERRIDES b/amdgcn/lib/OVERRIDES new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/amdgcn/lib/OVERRIDES diff --git a/amdgcn/lib/SOURCES b/amdgcn/lib/SOURCES new file mode 100644 index 0000000..c99f3fc --- /dev/null +++ b/amdgcn/lib/SOURCES @@ -0,0 +1 @@ +synchronization/barrier_impl.ll diff --git a/amdgcn/lib/synchronization/barrier_impl.ll b/amdgcn/lib/synchronization/barrier_impl.ll new file mode 100644 index 0000000..1809edd --- /dev/null +++ b/amdgcn/lib/synchronization/barrier_impl.ll @@ -0,0 +1,32 @@ +declare i32 @__clc_clk_local_mem_fence() #1 +declare i32 @__clc_clk_global_mem_fence() #1 +declare void @llvm.amdgcn.s.barrier() #0 + +define void @barrier(i32 %flags) #2 { +barrier_local_test: + %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence() + %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE + %1 = icmp ne i32 %0, 0 + br i1 %1, label %barrier_local, label %barrier_global_test + +barrier_local: + call void @llvm.amdgcn.s.barrier() + br label %barrier_global_test + +barrier_global_test: + %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence() + %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE + %3 = icmp ne i32 %2, 0 + br i1 %3, label %barrier_global, label %done + +barrier_global: + call void @llvm.amdgcn.s.barrier() + br label %done + +done: + ret void +} + +attributes #0 = { nounwind convergent } +attributes #1 = { nounwind alwaysinline } +attributes #2 = { nounwind convergent alwaysinline } diff --git a/amdgpu/lib/OVERRIDES b/amdgpu/lib/OVERRIDES new file mode 100644 index 0000000..3f941d8 --- /dev/null +++ b/amdgpu/lib/OVERRIDES @@ -0,0 +1,2 @@ +workitem/get_group_id.cl +workitem/get_global_size.cl diff --git a/amdgpu/lib/SOURCES b/amdgpu/lib/SOURCES new file mode 100644 index 0000000..7505f3f --- /dev/null +++ b/amdgpu/lib/SOURCES @@ -0,0 +1,25 @@ +atomic/atomic.cl +math/ldexp.cl +math/nextafter.cl +math/sqrt.cl +workitem/get_num_groups.ll +workitem/get_group_id.ll +workitem/get_local_size.ll +workitem/get_local_id.ll +workitem/get_global_size.ll +workitem/get_work_dim.ll +synchronization/barrier.cl +image/get_image_width.cl +image/get_image_height.cl +image/get_image_depth.cl +image/get_image_channel_data_type.cl +image/get_image_channel_order.cl +image/get_image_attributes_impl.ll +image/read_imagef.cl +image/read_imagei.cl +image/read_imageui.cl +image/read_image_impl.ll +image/write_imagef.cl +image/write_imagei.cl +image/write_imageui.cl +image/write_image_impl.ll diff --git a/r600/lib/atomic/atomic.cl b/amdgpu/lib/atomic/atomic.cl index 5bfe07b..5bfe07b 100644 --- a/r600/lib/atomic/atomic.cl +++ b/amdgpu/lib/atomic/atomic.cl diff --git a/r600/lib/image/get_image_attributes_impl.ll b/amdgpu/lib/image/get_image_attributes_impl.ll index 7f1965d..7f1965d 100644 --- a/r600/lib/image/get_image_attributes_impl.ll +++ b/amdgpu/lib/image/get_image_attributes_impl.ll diff --git a/r600/lib/image/get_image_channel_data_type.cl b/amdgpu/lib/image/get_image_channel_data_type.cl index 2a2478f..2a2478f 100644 --- a/r600/lib/image/get_image_channel_data_type.cl +++ b/amdgpu/lib/image/get_image_channel_data_type.cl diff --git a/r600/lib/image/get_image_channel_order.cl b/amdgpu/lib/image/get_image_channel_order.cl index 91e9b89..91e9b89 100644 --- a/r600/lib/image/get_image_channel_order.cl +++ b/amdgpu/lib/image/get_image_channel_order.cl diff --git a/r600/lib/image/get_image_depth.cl b/amdgpu/lib/image/get_image_depth.cl index 1864645..1864645 100644 --- a/r600/lib/image/get_image_depth.cl +++ b/amdgpu/lib/image/get_image_depth.cl diff --git a/r600/lib/image/get_image_height.cl b/amdgpu/lib/image/get_image_height.cl index 80b3640..80b3640 100644 --- a/r600/lib/image/get_image_height.cl +++ b/amdgpu/lib/image/get_image_height.cl diff --git a/r600/lib/image/get_image_width.cl b/amdgpu/lib/image/get_image_width.cl index 29e4e94..29e4e94 100644 --- a/r600/lib/image/get_image_width.cl +++ b/amdgpu/lib/image/get_image_width.cl diff --git a/r600/lib/image/read_image_impl.ll b/amdgpu/lib/image/read_image_impl.ll index 229a252..229a252 100644 --- a/r600/lib/image/read_image_impl.ll +++ b/amdgpu/lib/image/read_image_impl.ll diff --git a/r600/lib/image/read_imagef.cl b/amdgpu/lib/image/read_imagef.cl index af80ada..af80ada 100644 --- a/r600/lib/image/read_imagef.cl +++ b/amdgpu/lib/image/read_imagef.cl diff --git a/r600/lib/image/read_imagei.cl b/amdgpu/lib/image/read_imagei.cl index b973aae..b973aae 100644 --- a/r600/lib/image/read_imagei.cl +++ b/amdgpu/lib/image/read_imagei.cl diff --git a/r600/lib/image/read_imageui.cl b/amdgpu/lib/image/read_imageui.cl index ec9836e..ec9836e 100644 --- a/r600/lib/image/read_imageui.cl +++ b/amdgpu/lib/image/read_imageui.cl diff --git a/r600/lib/image/write_image_impl.ll b/amdgpu/lib/image/write_image_impl.ll index 265f5d6..265f5d6 100644 --- a/r600/lib/image/write_image_impl.ll +++ b/amdgpu/lib/image/write_image_impl.ll diff --git a/r600/lib/image/write_imagef.cl b/amdgpu/lib/image/write_imagef.cl index 4483fcf..4483fcf 100644 --- a/r600/lib/image/write_imagef.cl +++ b/amdgpu/lib/image/write_imagef.cl diff --git a/r600/lib/image/write_imagei.cl b/amdgpu/lib/image/write_imagei.cl index 394a223..394a223 100644 --- a/r600/lib/image/write_imagei.cl +++ b/amdgpu/lib/image/write_imagei.cl diff --git a/r600/lib/image/write_imageui.cl b/amdgpu/lib/image/write_imageui.cl index 91344de..91344de 100644 --- a/r600/lib/image/write_imageui.cl +++ b/amdgpu/lib/image/write_imageui.cl diff --git a/r600/lib/math/ldexp.cl b/amdgpu/lib/math/ldexp.cl index 80439ce..80439ce 100644 --- a/r600/lib/math/ldexp.cl +++ b/amdgpu/lib/math/ldexp.cl diff --git a/r600/lib/math/nextafter.cl b/amdgpu/lib/math/nextafter.cl index 4611c81..4611c81 100644 --- a/r600/lib/math/nextafter.cl +++ b/amdgpu/lib/math/nextafter.cl diff --git a/r600/lib/math/sqrt.cl b/amdgpu/lib/math/sqrt.cl index 3e5b17c..3e5b17c 100644 --- a/r600/lib/math/sqrt.cl +++ b/amdgpu/lib/math/sqrt.cl diff --git a/r600/lib/synchronization/barrier.cl b/amdgpu/lib/synchronization/barrier.cl index 6f2900b..6f2900b 100644 --- a/r600/lib/synchronization/barrier.cl +++ b/amdgpu/lib/synchronization/barrier.cl diff --git a/r600/lib/workitem/get_global_size.ll b/amdgpu/lib/workitem/get_global_size.ll index ac2d08d..ac2d08d 100644 --- a/r600/lib/workitem/get_global_size.ll +++ b/amdgpu/lib/workitem/get_global_size.ll diff --git a/r600/lib/workitem/get_group_id.ll b/amdgpu/lib/workitem/get_group_id.ll index 0dc86e5..0dc86e5 100644 --- a/r600/lib/workitem/get_group_id.ll +++ b/amdgpu/lib/workitem/get_group_id.ll diff --git a/r600/lib/workitem/get_local_id.ll b/amdgpu/lib/workitem/get_local_id.ll index ac5522a..ac5522a 100644 --- a/r600/lib/workitem/get_local_id.ll +++ b/amdgpu/lib/workitem/get_local_id.ll diff --git a/r600/lib/workitem/get_local_size.ll b/amdgpu/lib/workitem/get_local_size.ll index 0a98de6..0a98de6 100644 --- a/r600/lib/workitem/get_local_size.ll +++ b/amdgpu/lib/workitem/get_local_size.ll diff --git a/r600/lib/workitem/get_num_groups.ll b/amdgpu/lib/workitem/get_num_groups.ll index a708f42..a708f42 100644 --- a/r600/lib/workitem/get_num_groups.ll +++ b/amdgpu/lib/workitem/get_num_groups.ll diff --git a/r600/lib/workitem/get_work_dim.ll b/amdgpu/lib/workitem/get_work_dim.ll index 1f86b5e..1f86b5e 100644 --- a/r600/lib/workitem/get_work_dim.ll +++ b/amdgpu/lib/workitem/get_work_dim.ll diff --git a/configure.py b/configure.py index d591ef8..2663212 100755 --- a/configure.py +++ b/configure.py @@ -69,8 +69,8 @@ llvm_version = string.split(string.replace(llvm_config(['--version']), 'svn', '' llvm_int_version = int(llvm_version[0]) * 100 + int(llvm_version[1]) * 10 llvm_string_version = 'LLVM' + llvm_version[0] + '.' + llvm_version[1] -if llvm_int_version < 370: - print "libclc requires LLVM >= 3.7" +if llvm_int_version < 390: + print "libclc requires LLVM >= 3.9" sys.exit(1) llvm_system_libs = llvm_config(['--system-libs']) @@ -175,8 +175,8 @@ for target in targets: subdirs.append("%s-%s-%s" % (arch, t_vendor, t_os)) subdirs.append("%s-%s" % (arch, t_os)) subdirs.append(arch) - if arch == 'amdgcn': - subdirs.append('r600') + if arch == 'amdgcn' or arch == 'r600': + subdirs.append('amdgpu') incdirs = filter(os.path.isdir, [os.path.join(srcdir, subdir, 'include') for subdir in subdirs]) diff --git a/r600/lib/OVERRIDES b/r600/lib/OVERRIDES index 3f941d8..e69de29 100644 --- a/r600/lib/OVERRIDES +++ b/r600/lib/OVERRIDES @@ -1,2 +0,0 @@ -workitem/get_group_id.cl -workitem/get_global_size.cl diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES index 029b22c..c99f3fc 100644 --- a/r600/lib/SOURCES +++ b/r600/lib/SOURCES @@ -1,26 +1 @@ -atomic/atomic.cl -math/ldexp.cl -math/nextafter.cl -math/sqrt.cl -workitem/get_num_groups.ll -workitem/get_group_id.ll -workitem/get_local_size.ll -workitem/get_local_id.ll -workitem/get_global_size.ll -workitem/get_work_dim.ll -synchronization/barrier.cl synchronization/barrier_impl.ll -image/get_image_width.cl -image/get_image_height.cl -image/get_image_depth.cl -image/get_image_channel_data_type.cl -image/get_image_channel_order.cl -image/get_image_attributes_impl.ll -image/read_imagef.cl -image/read_imagei.cl -image/read_imageui.cl -image/read_image_impl.ll -image/write_imagef.cl -image/write_imagei.cl -image/write_imageui.cl -image/write_image_impl.ll diff --git a/r600/lib/synchronization/barrier_impl.ll b/r600/lib/synchronization/barrier_impl.ll index 3d8ee66..825b2eb 100644 --- a/r600/lib/synchronization/barrier_impl.ll +++ b/r600/lib/synchronization/barrier_impl.ll @@ -1,9 +1,9 @@ -declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline -declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline -declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate -declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate +declare i32 @__clc_clk_local_mem_fence() #1 +declare i32 @__clc_clk_global_mem_fence() #1 +declare void @llvm.AMDGPU.barrier.local() #0 +declare void @llvm.AMDGPU.barrier.global() #0 -define void @barrier(i32 %flags) nounwind noduplicate alwaysinline { +define void @barrier(i32 %flags) #2 { barrier_local_test: %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence() %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE @@ -11,7 +11,7 @@ barrier_local_test: br i1 %1, label %barrier_local, label %barrier_global_test barrier_local: - call void @llvm.AMDGPU.barrier.local() noduplicate + call void @llvm.AMDGPU.barrier.local() br label %barrier_global_test barrier_global_test: @@ -21,9 +21,13 @@ barrier_global_test: br i1 %3, label %barrier_global, label %done barrier_global: - call void @llvm.AMDGPU.barrier.global() noduplicate + call void @llvm.AMDGPU.barrier.global() br label %done done: ret void } + +attributes #0 = { nounwind convergent } +attributes #1 = { nounwind alwaysinline } +attributes #2 = { nounwind convergent alwaysinline } |