From ba011288a9659d57736956c47b8522eace520807 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 13 Feb 2016 01:01:59 +0000 Subject: Split sources for amdgcn and r600 Most files remain in a common amdgpu directory. Also switches barriers to to use convergent, and use llvm.amdgcn.s.barrier. This now requires 3.9/trunk to build amdgcn. git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@260777 91177308-0d34-0410-b5e6-96231b3b80d8 --- amdgcn/lib/OVERRIDES | 0 amdgcn/lib/SOURCES | 1 + amdgcn/lib/synchronization/barrier_impl.ll | 32 +++++++++ amdgpu/lib/OVERRIDES | 2 + amdgpu/lib/SOURCES | 25 +++++++ amdgpu/lib/atomic/atomic.cl | 65 ++++++++++++++++++ amdgpu/lib/image/get_image_attributes_impl.ll | 87 +++++++++++++++++++++++++ amdgpu/lib/image/get_image_channel_data_type.cl | 13 ++++ amdgpu/lib/image/get_image_channel_order.cl | 13 ++++ amdgpu/lib/image/get_image_depth.cl | 8 +++ amdgpu/lib/image/get_image_height.cl | 13 ++++ amdgpu/lib/image/get_image_width.cl | 13 ++++ amdgpu/lib/image/read_image_impl.ll | 46 +++++++++++++ amdgpu/lib/image/read_imagef.cl | 14 ++++ amdgpu/lib/image/read_imagei.cl | 23 +++++++ amdgpu/lib/image/read_imageui.cl | 23 +++++++ amdgpu/lib/image/write_image_impl.ll | 52 +++++++++++++++ amdgpu/lib/image/write_imagef.cl | 9 +++ amdgpu/lib/image/write_imagei.cl | 9 +++ amdgpu/lib/image/write_imageui.cl | 9 +++ amdgpu/lib/math/ldexp.cl | 47 +++++++++++++ amdgpu/lib/math/nextafter.cl | 4 ++ amdgpu/lib/math/sqrt.cl | 59 +++++++++++++++++ amdgpu/lib/synchronization/barrier.cl | 10 +++ amdgpu/lib/workitem/get_global_size.ll | 18 +++++ amdgpu/lib/workitem/get_group_id.ll | 18 +++++ amdgpu/lib/workitem/get_local_id.ll | 18 +++++ amdgpu/lib/workitem/get_local_size.ll | 18 +++++ amdgpu/lib/workitem/get_num_groups.ll | 18 +++++ amdgpu/lib/workitem/get_work_dim.ll | 8 +++ configure.py | 8 +-- r600/lib/OVERRIDES | 2 - r600/lib/SOURCES | 25 ------- r600/lib/atomic/atomic.cl | 65 ------------------ r600/lib/image/get_image_attributes_impl.ll | 87 ------------------------- r600/lib/image/get_image_channel_data_type.cl | 13 ---- r600/lib/image/get_image_channel_order.cl | 13 ---- r600/lib/image/get_image_depth.cl | 8 --- r600/lib/image/get_image_height.cl | 13 ---- r600/lib/image/get_image_width.cl | 13 ---- r600/lib/image/read_image_impl.ll | 46 ------------- r600/lib/image/read_imagef.cl | 14 ---- r600/lib/image/read_imagei.cl | 23 ------- r600/lib/image/read_imageui.cl | 23 ------- r600/lib/image/write_image_impl.ll | 52 --------------- r600/lib/image/write_imagef.cl | 9 --- r600/lib/image/write_imagei.cl | 9 --- r600/lib/image/write_imageui.cl | 9 --- r600/lib/math/ldexp.cl | 47 ------------- r600/lib/math/nextafter.cl | 4 -- r600/lib/math/sqrt.cl | 59 ----------------- r600/lib/synchronization/barrier.cl | 10 --- r600/lib/synchronization/barrier_impl.ll | 18 +++-- r600/lib/workitem/get_global_size.ll | 18 ----- r600/lib/workitem/get_group_id.ll | 18 ----- r600/lib/workitem/get_local_id.ll | 18 ----- r600/lib/workitem/get_local_size.ll | 18 ----- r600/lib/workitem/get_num_groups.ll | 18 ----- r600/lib/workitem/get_work_dim.ll | 8 --- 59 files changed, 690 insertions(+), 653 deletions(-) create mode 100644 amdgcn/lib/OVERRIDES create mode 100644 amdgcn/lib/SOURCES create mode 100644 amdgcn/lib/synchronization/barrier_impl.ll create mode 100644 amdgpu/lib/OVERRIDES create mode 100644 amdgpu/lib/SOURCES create mode 100644 amdgpu/lib/atomic/atomic.cl create mode 100644 amdgpu/lib/image/get_image_attributes_impl.ll create mode 100644 amdgpu/lib/image/get_image_channel_data_type.cl create mode 100644 amdgpu/lib/image/get_image_channel_order.cl create mode 100644 amdgpu/lib/image/get_image_depth.cl create mode 100644 amdgpu/lib/image/get_image_height.cl create mode 100644 amdgpu/lib/image/get_image_width.cl create mode 100644 amdgpu/lib/image/read_image_impl.ll create mode 100644 amdgpu/lib/image/read_imagef.cl create mode 100644 amdgpu/lib/image/read_imagei.cl create mode 100644 amdgpu/lib/image/read_imageui.cl create mode 100644 amdgpu/lib/image/write_image_impl.ll create mode 100644 amdgpu/lib/image/write_imagef.cl create mode 100644 amdgpu/lib/image/write_imagei.cl create mode 100644 amdgpu/lib/image/write_imageui.cl create mode 100644 amdgpu/lib/math/ldexp.cl create mode 100644 amdgpu/lib/math/nextafter.cl create mode 100644 amdgpu/lib/math/sqrt.cl create mode 100644 amdgpu/lib/synchronization/barrier.cl create mode 100644 amdgpu/lib/workitem/get_global_size.ll create mode 100644 amdgpu/lib/workitem/get_group_id.ll create mode 100644 amdgpu/lib/workitem/get_local_id.ll create mode 100644 amdgpu/lib/workitem/get_local_size.ll create mode 100644 amdgpu/lib/workitem/get_num_groups.ll create mode 100644 amdgpu/lib/workitem/get_work_dim.ll delete mode 100644 r600/lib/atomic/atomic.cl delete mode 100644 r600/lib/image/get_image_attributes_impl.ll delete mode 100644 r600/lib/image/get_image_channel_data_type.cl delete mode 100644 r600/lib/image/get_image_channel_order.cl delete mode 100644 r600/lib/image/get_image_depth.cl delete mode 100644 r600/lib/image/get_image_height.cl delete mode 100644 r600/lib/image/get_image_width.cl delete mode 100644 r600/lib/image/read_image_impl.ll delete mode 100644 r600/lib/image/read_imagef.cl delete mode 100644 r600/lib/image/read_imagei.cl delete mode 100644 r600/lib/image/read_imageui.cl delete mode 100644 r600/lib/image/write_image_impl.ll delete mode 100644 r600/lib/image/write_imagef.cl delete mode 100644 r600/lib/image/write_imagei.cl delete mode 100644 r600/lib/image/write_imageui.cl delete mode 100644 r600/lib/math/ldexp.cl delete mode 100644 r600/lib/math/nextafter.cl delete mode 100644 r600/lib/math/sqrt.cl delete mode 100644 r600/lib/synchronization/barrier.cl delete mode 100644 r600/lib/workitem/get_global_size.ll delete mode 100644 r600/lib/workitem/get_group_id.ll delete mode 100644 r600/lib/workitem/get_local_id.ll delete mode 100644 r600/lib/workitem/get_local_size.ll delete mode 100644 r600/lib/workitem/get_num_groups.ll delete mode 100644 r600/lib/workitem/get_work_dim.ll diff --git a/amdgcn/lib/OVERRIDES b/amdgcn/lib/OVERRIDES new file mode 100644 index 0000000..e69de29 diff --git a/amdgcn/lib/SOURCES b/amdgcn/lib/SOURCES new file mode 100644 index 0000000..c99f3fc --- /dev/null +++ b/amdgcn/lib/SOURCES @@ -0,0 +1 @@ +synchronization/barrier_impl.ll diff --git a/amdgcn/lib/synchronization/barrier_impl.ll b/amdgcn/lib/synchronization/barrier_impl.ll new file mode 100644 index 0000000..1809edd --- /dev/null +++ b/amdgcn/lib/synchronization/barrier_impl.ll @@ -0,0 +1,32 @@ +declare i32 @__clc_clk_local_mem_fence() #1 +declare i32 @__clc_clk_global_mem_fence() #1 +declare void @llvm.amdgcn.s.barrier() #0 + +define void @barrier(i32 %flags) #2 { +barrier_local_test: + %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence() + %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE + %1 = icmp ne i32 %0, 0 + br i1 %1, label %barrier_local, label %barrier_global_test + +barrier_local: + call void @llvm.amdgcn.s.barrier() + br label %barrier_global_test + +barrier_global_test: + %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence() + %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE + %3 = icmp ne i32 %2, 0 + br i1 %3, label %barrier_global, label %done + +barrier_global: + call void @llvm.amdgcn.s.barrier() + br label %done + +done: + ret void +} + +attributes #0 = { nounwind convergent } +attributes #1 = { nounwind alwaysinline } +attributes #2 = { nounwind convergent alwaysinline } diff --git a/amdgpu/lib/OVERRIDES b/amdgpu/lib/OVERRIDES new file mode 100644 index 0000000..3f941d8 --- /dev/null +++ b/amdgpu/lib/OVERRIDES @@ -0,0 +1,2 @@ +workitem/get_group_id.cl +workitem/get_global_size.cl diff --git a/amdgpu/lib/SOURCES b/amdgpu/lib/SOURCES new file mode 100644 index 0000000..7505f3f --- /dev/null +++ b/amdgpu/lib/SOURCES @@ -0,0 +1,25 @@ +atomic/atomic.cl +math/ldexp.cl +math/nextafter.cl +math/sqrt.cl +workitem/get_num_groups.ll +workitem/get_group_id.ll +workitem/get_local_size.ll +workitem/get_local_id.ll +workitem/get_global_size.ll +workitem/get_work_dim.ll +synchronization/barrier.cl +image/get_image_width.cl +image/get_image_height.cl +image/get_image_depth.cl +image/get_image_channel_data_type.cl +image/get_image_channel_order.cl +image/get_image_attributes_impl.ll +image/read_imagef.cl +image/read_imagei.cl +image/read_imageui.cl +image/read_image_impl.ll +image/write_imagef.cl +image/write_imagei.cl +image/write_imageui.cl +image/write_image_impl.ll diff --git a/amdgpu/lib/atomic/atomic.cl b/amdgpu/lib/atomic/atomic.cl new file mode 100644 index 0000000..5bfe07b --- /dev/null +++ b/amdgpu/lib/atomic/atomic.cl @@ -0,0 +1,65 @@ +#include + +#define ATOMIC_FUNC_DEFINE(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ +_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE val) { \ + return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)val); \ +} + +/* For atomic functions that don't need different bitcode dependending on argument signedness */ +#define ATOMIC_FUNC_SIGN(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ + _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE); \ + ATOMIC_FUNC_DEFINE(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ + ATOMIC_FUNC_DEFINE(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) + +#define ATOMIC_FUNC_ADDRSPACE(TYPE, FUNCTION) \ + ATOMIC_FUNC_SIGN(TYPE, FUNCTION, global, 1) \ + ATOMIC_FUNC_SIGN(TYPE, FUNCTION, local, 3) + +#define ATOMIC_FUNC(FUNCTION) \ + ATOMIC_FUNC_ADDRSPACE(int, FUNCTION) + +#define ATOMIC_FUNC_DEFINE_3_ARG(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ +_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE cmp, RET_SIGN TYPE val) { \ + return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)cmp, (ARG_SIGN TYPE)val); \ +} + +/* For atomic functions that don't need different bitcode dependending on argument signedness */ +#define ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ + _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE, signed TYPE); \ + ATOMIC_FUNC_DEFINE_3_ARG(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ + ATOMIC_FUNC_DEFINE_3_ARG(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) + +#define ATOMIC_FUNC_ADDRSPACE_3_ARG(TYPE, FUNCTION) \ + ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, global, 1) \ + ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, local, 3) + +#define ATOMIC_FUNC_3_ARG(FUNCTION) \ + ATOMIC_FUNC_ADDRSPACE_3_ARG(int, FUNCTION) + +ATOMIC_FUNC(atomic_add) +ATOMIC_FUNC(atomic_and) +ATOMIC_FUNC(atomic_or) +ATOMIC_FUNC(atomic_sub) +ATOMIC_FUNC(atomic_xchg) +ATOMIC_FUNC(atomic_xor) +ATOMIC_FUNC_3_ARG(atomic_cmpxchg) + +_CLC_DECL signed int __clc_atomic_max_addr1(volatile global signed int*, signed int); +_CLC_DECL signed int __clc_atomic_max_addr3(volatile local signed int*, signed int); +_CLC_DECL uint __clc_atomic_umax_addr1(volatile global uint*, uint); +_CLC_DECL uint __clc_atomic_umax_addr3(volatile local uint*, uint); + +ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, global, 1) +ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, local, 3) +ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, global, 1) +ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, local, 3) + +_CLC_DECL signed int __clc_atomic_min_addr1(volatile global signed int*, signed int); +_CLC_DECL signed int __clc_atomic_min_addr3(volatile local signed int*, signed int); +_CLC_DECL uint __clc_atomic_umin_addr1(volatile global uint*, uint); +_CLC_DECL uint __clc_atomic_umin_addr3(volatile local uint*, uint); + +ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, global, 1) +ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, local, 3) +ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, global, 1) +ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, local, 3) diff --git a/amdgpu/lib/image/get_image_attributes_impl.ll b/amdgpu/lib/image/get_image_attributes_impl.ll new file mode 100644 index 0000000..7f1965d --- /dev/null +++ b/amdgpu/lib/image/get_image_attributes_impl.ll @@ -0,0 +1,87 @@ +%opencl.image2d_t = type opaque +%opencl.image3d_t = type opaque + +declare i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)*) nounwind readnone +declare i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)*) nounwind readnone + +declare [3 x i32] @llvm.OpenCL.image.get.size.2d( + %opencl.image2d_t addrspace(1)*) nounwind readnone +declare [3 x i32] @llvm.OpenCL.image.get.size.3d( + %opencl.image3d_t addrspace(1)*) nounwind readnone + +declare [2 x i32] @llvm.OpenCL.image.get.format.2d( + %opencl.image2d_t addrspace(1)*) nounwind readnone +declare [2 x i32] @llvm.OpenCL.image.get.format.3d( + %opencl.image3d_t addrspace(1)*) nounwind readnone + +define i32 @__clc_get_image_width_2d( + %opencl.image2d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d( + %opencl.image2d_t addrspace(1)* %img) + %2 = extractvalue [3 x i32] %1, 0 + ret i32 %2 +} +define i32 @__clc_get_image_width_3d( + %opencl.image3d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( + %opencl.image3d_t addrspace(1)* %img) + %2 = extractvalue [3 x i32] %1, 0 + ret i32 %2 +} + +define i32 @__clc_get_image_height_2d( + %opencl.image2d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d( + %opencl.image2d_t addrspace(1)* %img) + %2 = extractvalue [3 x i32] %1, 1 + ret i32 %2 +} +define i32 @__clc_get_image_height_3d( + %opencl.image3d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( + %opencl.image3d_t addrspace(1)* %img) + %2 = extractvalue [3 x i32] %1, 1 + ret i32 %2 +} + +define i32 @__clc_get_image_depth_3d( + %opencl.image3d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( + %opencl.image3d_t addrspace(1)* %img) + %2 = extractvalue [3 x i32] %1, 2 + ret i32 %2 +} + +define i32 @__clc_get_image_channel_data_type_2d( + %opencl.image2d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d( + %opencl.image2d_t addrspace(1)* %img) + %2 = extractvalue [2 x i32] %1, 0 + ret i32 %2 +} +define i32 @__clc_get_image_channel_data_type_3d( + %opencl.image3d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d( + %opencl.image3d_t addrspace(1)* %img) + %2 = extractvalue [2 x i32] %1, 0 + ret i32 %2 +} + +define i32 @__clc_get_image_channel_order_2d( + %opencl.image2d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d( + %opencl.image2d_t addrspace(1)* %img) + %2 = extractvalue [2 x i32] %1, 1 + ret i32 %2 +} +define i32 @__clc_get_image_channel_order_3d( + %opencl.image3d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d( + %opencl.image3d_t addrspace(1)* %img) + %2 = extractvalue [2 x i32] %1, 1 + ret i32 %2 +} + +attributes #0 = { nounwind readnone alwaysinline } diff --git a/amdgpu/lib/image/get_image_channel_data_type.cl b/amdgpu/lib/image/get_image_channel_data_type.cl new file mode 100644 index 0000000..2a2478f --- /dev/null +++ b/amdgpu/lib/image/get_image_channel_data_type.cl @@ -0,0 +1,13 @@ +#include + +_CLC_DECL int __clc_get_image_channel_data_type_2d(image2d_t); +_CLC_DECL int __clc_get_image_channel_data_type_3d(image3d_t); + +_CLC_OVERLOAD _CLC_DEF int +get_image_channel_data_type(image2d_t image) { + return __clc_get_image_channel_data_type_2d(image); +} +_CLC_OVERLOAD _CLC_DEF int +get_image_channel_data_type(image3d_t image) { + return __clc_get_image_channel_data_type_3d(image); +} diff --git a/amdgpu/lib/image/get_image_channel_order.cl b/amdgpu/lib/image/get_image_channel_order.cl new file mode 100644 index 0000000..91e9b89 --- /dev/null +++ b/amdgpu/lib/image/get_image_channel_order.cl @@ -0,0 +1,13 @@ +#include + +_CLC_DECL int __clc_get_image_channel_order_2d(image2d_t); +_CLC_DECL int __clc_get_image_channel_order_3d(image3d_t); + +_CLC_OVERLOAD _CLC_DEF int +get_image_channel_order(image2d_t image) { + return __clc_get_image_channel_order_2d(image); +} +_CLC_OVERLOAD _CLC_DEF int +get_image_channel_order(image3d_t image) { + return __clc_get_image_channel_order_3d(image); +} diff --git a/amdgpu/lib/image/get_image_depth.cl b/amdgpu/lib/image/get_image_depth.cl new file mode 100644 index 0000000..1864645 --- /dev/null +++ b/amdgpu/lib/image/get_image_depth.cl @@ -0,0 +1,8 @@ +#include + +_CLC_DECL int __clc_get_image_depth_3d(image3d_t); + +_CLC_OVERLOAD _CLC_DEF int +get_image_depth(image3d_t image) { + return __clc_get_image_depth_3d(image); +} diff --git a/amdgpu/lib/image/get_image_height.cl b/amdgpu/lib/image/get_image_height.cl new file mode 100644 index 0000000..80b3640 --- /dev/null +++ b/amdgpu/lib/image/get_image_height.cl @@ -0,0 +1,13 @@ +#include + +_CLC_DECL int __clc_get_image_height_2d(image2d_t); +_CLC_DECL int __clc_get_image_height_3d(image3d_t); + +_CLC_OVERLOAD _CLC_DEF int +get_image_height(image2d_t image) { + return __clc_get_image_height_2d(image); +} +_CLC_OVERLOAD _CLC_DEF int +get_image_height(image3d_t image) { + return __clc_get_image_height_3d(image); +} diff --git a/amdgpu/lib/image/get_image_width.cl b/amdgpu/lib/image/get_image_width.cl new file mode 100644 index 0000000..29e4e94 --- /dev/null +++ b/amdgpu/lib/image/get_image_width.cl @@ -0,0 +1,13 @@ +#include + +_CLC_DECL int __clc_get_image_width_2d(image2d_t); +_CLC_DECL int __clc_get_image_width_3d(image3d_t); + +_CLC_OVERLOAD _CLC_DEF int +get_image_width(image2d_t image) { + return __clc_get_image_width_2d(image); +} +_CLC_OVERLOAD _CLC_DEF int +get_image_width(image3d_t image) { + return __clc_get_image_width_3d(image); +} diff --git a/amdgpu/lib/image/read_image_impl.ll b/amdgpu/lib/image/read_image_impl.ll new file mode 100644 index 0000000..229a252 --- /dev/null +++ b/amdgpu/lib/image/read_image_impl.ll @@ -0,0 +1,46 @@ +%opencl.image2d_t = type opaque + +declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, + i32, i32, i32) readnone +declare i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)*) nounwind readnone +declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) readnone + +define <4 x float> @__clc_v4f_from_v2f(<2 x float> %v) alwaysinline { + %e0 = extractelement <2 x float> %v, i32 0 + %e1 = extractelement <2 x float> %v, i32 1 + %res.0 = insertelement <4 x float> undef, float %e0, i32 0 + %res.1 = insertelement <4 x float> %res.0, float %e1, i32 1 + %res.2 = insertelement <4 x float> %res.1, float 0.0, i32 2 + %res.3 = insertelement <4 x float> %res.2, float 0.0, i32 3 + ret <4 x float> %res.3 +} + +define <4 x float> @__clc_read_imagef_tex( + %opencl.image2d_t addrspace(1)* nocapture %img, + i32 %sampler, <2 x float> %coord) alwaysinline { +entry: + %coord_v4 = call <4 x float> @__clc_v4f_from_v2f(<2 x float> %coord) + %smp_id = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %sampler) + %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %img) + %tex_id = add i32 %img_id, 2 ; First 2 IDs are reserved. + + %coord_norm = and i32 %sampler, 1 + %is_norm = icmp eq i32 %coord_norm, 1 + br i1 %is_norm, label %NormCoord, label %UnnormCoord +NormCoord: + %data.norm = call <4 x float> @llvm.R600.tex( + <4 x float> %coord_v4, + i32 0, i32 0, i32 0, ; Offset. + i32 2, i32 %smp_id, + i32 1, i32 1, i32 1, i32 1) ; Normalized coords. + ret <4 x float> %data.norm +UnnormCoord: + %data.unnorm = call <4 x float> @llvm.R600.tex( + <4 x float> %coord_v4, + i32 0, i32 0, i32 0, ; Offset. + i32 %tex_id, i32 %smp_id, + i32 0, i32 0, i32 0, i32 0) ; Unnormalized coords. + ret <4 x float> %data.unnorm +} diff --git a/amdgpu/lib/image/read_imagef.cl b/amdgpu/lib/image/read_imagef.cl new file mode 100644 index 0000000..af80ada --- /dev/null +++ b/amdgpu/lib/image/read_imagef.cl @@ -0,0 +1,14 @@ +#include + +_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); + +_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, + int2 coord) { + float2 coord_float = (float2)(coord.x, coord.y); + return __clc_read_imagef_tex(image, sampler, coord_float); +} + +_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, + float2 coord) { + return __clc_read_imagef_tex(image, sampler, coord); +} diff --git a/amdgpu/lib/image/read_imagei.cl b/amdgpu/lib/image/read_imagei.cl new file mode 100644 index 0000000..b973aae --- /dev/null +++ b/amdgpu/lib/image/read_imagei.cl @@ -0,0 +1,23 @@ +#include + +_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); + +int4 __clc_reinterpret_v4f_to_v4i(float4 v) { + union { + int4 v4i; + float4 v4f; + } res = { .v4f = v}; + return res.v4i; +} + +_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, + int2 coord) { + float2 coord_float = (float2)(coord.x, coord.y); + return __clc_reinterpret_v4f_to_v4i( + __clc_read_imagef_tex(image, sampler, coord_float)); +} +_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, + float2 coord) { + return __clc_reinterpret_v4f_to_v4i( + __clc_read_imagef_tex(image, sampler, coord)); +} diff --git a/amdgpu/lib/image/read_imageui.cl b/amdgpu/lib/image/read_imageui.cl new file mode 100644 index 0000000..ec9836e --- /dev/null +++ b/amdgpu/lib/image/read_imageui.cl @@ -0,0 +1,23 @@ +#include + +_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); + +uint4 __clc_reinterpret_v4f_to_v4ui(float4 v) { + union { + uint4 v4ui; + float4 v4f; + } res = { .v4f = v}; + return res.v4ui; +} + +_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, + int2 coord) { + float2 coord_float = (float2)(coord.x, coord.y); + return __clc_reinterpret_v4f_to_v4ui( + __clc_read_imagef_tex(image, sampler, coord_float)); +} +_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, + float2 coord) { + return __clc_reinterpret_v4f_to_v4ui( + __clc_read_imagef_tex(image, sampler, coord)); +} diff --git a/amdgpu/lib/image/write_image_impl.ll b/amdgpu/lib/image/write_image_impl.ll new file mode 100644 index 0000000..265f5d6 --- /dev/null +++ b/amdgpu/lib/image/write_image_impl.ll @@ -0,0 +1,52 @@ +%opencl.image2d_t = type opaque +%opencl.image3d_t = type opaque + +declare i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)*) nounwind readnone +declare i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)*) nounwind readnone + +declare void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord, i32 %rat_id) + +define void @__clc_write_imageui_2d( + %opencl.image2d_t addrspace(1)* nocapture %img, + <2 x i32> %coord, <4 x i32> %color) #0 { + + ; Coordinate int2 -> int4. + %e0 = extractelement <2 x i32> %coord, i32 0 + %e1 = extractelement <2 x i32> %coord, i32 1 + %coord.0 = insertelement <4 x i32> undef, i32 %e0, i32 0 + %coord.1 = insertelement <4 x i32> %coord.0, i32 %e1, i32 1 + %coord.2 = insertelement <4 x i32> %coord.1, i32 0, i32 2 + %coord.3 = insertelement <4 x i32> %coord.2, i32 0, i32 3 + + ; Get RAT ID. + %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %img) + %rat_id = add i32 %img_id, 1 + + ; Call store intrinsic. + call void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord.3, i32 %rat_id) + ret void +} + +define void @__clc_write_imagei_2d( + %opencl.image2d_t addrspace(1)* nocapture %img, + <2 x i32> %coord, <4 x i32> %color) #0 { + call void @__clc_write_imageui_2d( + %opencl.image2d_t addrspace(1)* nocapture %img, + <2 x i32> %coord, <4 x i32> %color) + ret void +} + +define void @__clc_write_imagef_2d( + %opencl.image2d_t addrspace(1)* nocapture %img, + <2 x i32> %coord, <4 x float> %color) #0 { + %color.i32 = bitcast <4 x float> %color to <4 x i32> + call void @__clc_write_imageui_2d( + %opencl.image2d_t addrspace(1)* nocapture %img, + <2 x i32> %coord, <4 x i32> %color.i32) + ret void +} + +attributes #0 = { alwaysinline } diff --git a/amdgpu/lib/image/write_imagef.cl b/amdgpu/lib/image/write_imagef.cl new file mode 100644 index 0000000..4483fcf --- /dev/null +++ b/amdgpu/lib/image/write_imagef.cl @@ -0,0 +1,9 @@ +#include + +_CLC_DECL void __clc_write_imagef_2d(image2d_t image, int2 coord, float4 color); + +_CLC_OVERLOAD _CLC_DEF void +write_imagef(image2d_t image, int2 coord, float4 color) +{ + __clc_write_imagef_2d(image, coord, color); +} diff --git a/amdgpu/lib/image/write_imagei.cl b/amdgpu/lib/image/write_imagei.cl new file mode 100644 index 0000000..394a223 --- /dev/null +++ b/amdgpu/lib/image/write_imagei.cl @@ -0,0 +1,9 @@ +#include + +_CLC_DECL void __clc_write_imagei_2d(image2d_t image, int2 coord, int4 color); + +_CLC_OVERLOAD _CLC_DEF void +write_imagei(image2d_t image, int2 coord, int4 color) +{ + __clc_write_imagei_2d(image, coord, color); +} diff --git a/amdgpu/lib/image/write_imageui.cl b/amdgpu/lib/image/write_imageui.cl new file mode 100644 index 0000000..91344de --- /dev/null +++ b/amdgpu/lib/image/write_imageui.cl @@ -0,0 +1,9 @@ +#include + +_CLC_DECL void __clc_write_imageui_2d(image2d_t image, int2 coord, uint4 color); + +_CLC_OVERLOAD _CLC_DEF void +write_imageui(image2d_t image, int2 coord, uint4 color) +{ + __clc_write_imageui_2d(image, coord, color); +} diff --git a/amdgpu/lib/math/ldexp.cl b/amdgpu/lib/math/ldexp.cl new file mode 100644 index 0000000..80439ce --- /dev/null +++ b/amdgpu/lib/math/ldexp.cl @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include "../../../generic/lib/clcmacro.h" + +#ifdef __HAS_LDEXPF__ +#define BUILTINF __builtin_amdgpu_ldexpf +#else +#include "math/clc_ldexp.h" +#define BUILTINF __clc_ldexp +#endif + +// This defines all the ldexp(floatN, intN) variants. +_CLC_DEFINE_BINARY_BUILTIN(float, ldexp, BUILTINF, float, int); + +#ifdef cl_khr_fp64 + #pragma OPENCL EXTENSION cl_khr_fp64 : enable + // This defines all the ldexp(doubleN, intN) variants. + _CLC_DEFINE_BINARY_BUILTIN(double, ldexp, __builtin_amdgpu_ldexp, double, int); +#endif + +// This defines all the ldexp(GENTYPE, int); +#define __CLC_BODY <../../../generic/lib/math/ldexp.inc> +#include + +#undef BUILTINF diff --git a/amdgpu/lib/math/nextafter.cl b/amdgpu/lib/math/nextafter.cl new file mode 100644 index 0000000..4611c81 --- /dev/null +++ b/amdgpu/lib/math/nextafter.cl @@ -0,0 +1,4 @@ +#include +#include "../lib/clcmacro.h" + +_CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float) diff --git a/amdgpu/lib/math/sqrt.cl b/amdgpu/lib/math/sqrt.cl new file mode 100644 index 0000000..3e5b17c --- /dev/null +++ b/amdgpu/lib/math/sqrt.cl @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include "../../../generic/lib/clcmacro.h" +#include "math/clc_sqrt.h" + +_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + + +_CLC_OVERLOAD _CLC_DEF double sqrt(double x) { + + uint vcc = x < 0x1p-767; + uint exp0 = vcc ? 0x100 : 0; + unsigned exp1 = vcc ? 0xffffff80 : 0; + + double v01 = ldexp(x, exp0); + double v23 = __builtin_amdgpu_rsq(v01); + double v45 = v01 * v23; + v23 = v23 * 0.5; + + double v67 = fma(-v23, v45, 0.5); + v45 = fma(v45, v67, v45); + double v89 = fma(-v45, v45, v01); + v23 = fma(v23, v67, v23); + v45 = fma(v89, v23, v45); + v67 = fma(-v45, v45, v01); + v23 = fma(v67, v23, v45); + + v23 = ldexp(v23, exp1); + return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double); + +#endif diff --git a/amdgpu/lib/synchronization/barrier.cl b/amdgpu/lib/synchronization/barrier.cl new file mode 100644 index 0000000..6f2900b --- /dev/null +++ b/amdgpu/lib/synchronization/barrier.cl @@ -0,0 +1,10 @@ + +#include + +_CLC_DEF int __clc_clk_local_mem_fence() { + return CLK_LOCAL_MEM_FENCE; +} + +_CLC_DEF int __clc_clk_global_mem_fence() { + return CLK_GLOBAL_MEM_FENCE; +} diff --git a/amdgpu/lib/workitem/get_global_size.ll b/amdgpu/lib/workitem/get_global_size.ll new file mode 100644 index 0000000..ac2d08d --- /dev/null +++ b/amdgpu/lib/workitem/get_global_size.ll @@ -0,0 +1,18 @@ +declare i32 @llvm.r600.read.global.size.x() nounwind readnone +declare i32 @llvm.r600.read.global.size.y() nounwind readnone +declare i32 @llvm.r600.read.global.size.z() nounwind readnone + +define i32 @get_global_size(i32 %dim) nounwind readnone alwaysinline { + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] +x_dim: + %x = call i32 @llvm.r600.read.global.size.x() nounwind readnone + ret i32 %x +y_dim: + %y = call i32 @llvm.r600.read.global.size.y() nounwind readnone + ret i32 %y +z_dim: + %z = call i32 @llvm.r600.read.global.size.z() nounwind readnone + ret i32 %z +default: + ret i32 0 +} diff --git a/amdgpu/lib/workitem/get_group_id.ll b/amdgpu/lib/workitem/get_group_id.ll new file mode 100644 index 0000000..0dc86e5 --- /dev/null +++ b/amdgpu/lib/workitem/get_group_id.ll @@ -0,0 +1,18 @@ +declare i32 @llvm.r600.read.tgid.x() nounwind readnone +declare i32 @llvm.r600.read.tgid.y() nounwind readnone +declare i32 @llvm.r600.read.tgid.z() nounwind readnone + +define i32 @get_group_id(i32 %dim) nounwind readnone alwaysinline { + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] +x_dim: + %x = call i32 @llvm.r600.read.tgid.x() nounwind readnone + ret i32 %x +y_dim: + %y = call i32 @llvm.r600.read.tgid.y() nounwind readnone + ret i32 %y +z_dim: + %z = call i32 @llvm.r600.read.tgid.z() nounwind readnone + ret i32 %z +default: + ret i32 0 +} diff --git a/amdgpu/lib/workitem/get_local_id.ll b/amdgpu/lib/workitem/get_local_id.ll new file mode 100644 index 0000000..ac5522a --- /dev/null +++ b/amdgpu/lib/workitem/get_local_id.ll @@ -0,0 +1,18 @@ +declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.r600.read.tidig.y() nounwind readnone +declare i32 @llvm.r600.read.tidig.z() nounwind readnone + +define i32 @get_local_id(i32 %dim) nounwind readnone alwaysinline { + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] +x_dim: + %x = call i32 @llvm.r600.read.tidig.x() nounwind readnone + ret i32 %x +y_dim: + %y = call i32 @llvm.r600.read.tidig.y() nounwind readnone + ret i32 %y +z_dim: + %z = call i32 @llvm.r600.read.tidig.z() nounwind readnone + ret i32 %z +default: + ret i32 0 +} diff --git a/amdgpu/lib/workitem/get_local_size.ll b/amdgpu/lib/workitem/get_local_size.ll new file mode 100644 index 0000000..0a98de6 --- /dev/null +++ b/amdgpu/lib/workitem/get_local_size.ll @@ -0,0 +1,18 @@ +declare i32 @llvm.r600.read.local.size.x() nounwind readnone +declare i32 @llvm.r600.read.local.size.y() nounwind readnone +declare i32 @llvm.r600.read.local.size.z() nounwind readnone + +define i32 @get_local_size(i32 %dim) nounwind readnone alwaysinline { + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] +x_dim: + %x = call i32 @llvm.r600.read.local.size.x() nounwind readnone + ret i32 %x +y_dim: + %y = call i32 @llvm.r600.read.local.size.y() nounwind readnone + ret i32 %y +z_dim: + %z = call i32 @llvm.r600.read.local.size.z() nounwind readnone + ret i32 %z +default: + ret i32 0 +} diff --git a/amdgpu/lib/workitem/get_num_groups.ll b/amdgpu/lib/workitem/get_num_groups.ll new file mode 100644 index 0000000..a708f42 --- /dev/null +++ b/amdgpu/lib/workitem/get_num_groups.ll @@ -0,0 +1,18 @@ +declare i32 @llvm.r600.read.ngroups.x() nounwind readnone +declare i32 @llvm.r600.read.ngroups.y() nounwind readnone +declare i32 @llvm.r600.read.ngroups.z() nounwind readnone + +define i32 @get_num_groups(i32 %dim) nounwind readnone alwaysinline { + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] +x_dim: + %x = call i32 @llvm.r600.read.ngroups.x() nounwind readnone + ret i32 %x +y_dim: + %y = call i32 @llvm.r600.read.ngroups.y() nounwind readnone + ret i32 %y +z_dim: + %z = call i32 @llvm.r600.read.ngroups.z() nounwind readnone + ret i32 %z +default: + ret i32 0 +} diff --git a/amdgpu/lib/workitem/get_work_dim.ll b/amdgpu/lib/workitem/get_work_dim.ll new file mode 100644 index 0000000..1f86b5e --- /dev/null +++ b/amdgpu/lib/workitem/get_work_dim.ll @@ -0,0 +1,8 @@ +declare i32 @llvm.AMDGPU.read.workdim() nounwind readnone + +define i32 @get_work_dim() nounwind readnone alwaysinline { + %x = call i32 @llvm.AMDGPU.read.workdim() nounwind readnone , !range !0 + ret i32 %x +} + +!0 = !{ i32 1, i32 4 } diff --git a/configure.py b/configure.py index d591ef8..2663212 100755 --- a/configure.py +++ b/configure.py @@ -69,8 +69,8 @@ llvm_version = string.split(string.replace(llvm_config(['--version']), 'svn', '' llvm_int_version = int(llvm_version[0]) * 100 + int(llvm_version[1]) * 10 llvm_string_version = 'LLVM' + llvm_version[0] + '.' + llvm_version[1] -if llvm_int_version < 370: - print "libclc requires LLVM >= 3.7" +if llvm_int_version < 390: + print "libclc requires LLVM >= 3.9" sys.exit(1) llvm_system_libs = llvm_config(['--system-libs']) @@ -175,8 +175,8 @@ for target in targets: subdirs.append("%s-%s-%s" % (arch, t_vendor, t_os)) subdirs.append("%s-%s" % (arch, t_os)) subdirs.append(arch) - if arch == 'amdgcn': - subdirs.append('r600') + if arch == 'amdgcn' or arch == 'r600': + subdirs.append('amdgpu') incdirs = filter(os.path.isdir, [os.path.join(srcdir, subdir, 'include') for subdir in subdirs]) diff --git a/r600/lib/OVERRIDES b/r600/lib/OVERRIDES index 3f941d8..e69de29 100644 --- a/r600/lib/OVERRIDES +++ b/r600/lib/OVERRIDES @@ -1,2 +0,0 @@ -workitem/get_group_id.cl -workitem/get_global_size.cl diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES index 029b22c..c99f3fc 100644 --- a/r600/lib/SOURCES +++ b/r600/lib/SOURCES @@ -1,26 +1 @@ -atomic/atomic.cl -math/ldexp.cl -math/nextafter.cl -math/sqrt.cl -workitem/get_num_groups.ll -workitem/get_group_id.ll -workitem/get_local_size.ll -workitem/get_local_id.ll -workitem/get_global_size.ll -workitem/get_work_dim.ll -synchronization/barrier.cl synchronization/barrier_impl.ll -image/get_image_width.cl -image/get_image_height.cl -image/get_image_depth.cl -image/get_image_channel_data_type.cl -image/get_image_channel_order.cl -image/get_image_attributes_impl.ll -image/read_imagef.cl -image/read_imagei.cl -image/read_imageui.cl -image/read_image_impl.ll -image/write_imagef.cl -image/write_imagei.cl -image/write_imageui.cl -image/write_image_impl.ll diff --git a/r600/lib/atomic/atomic.cl b/r600/lib/atomic/atomic.cl deleted file mode 100644 index 5bfe07b..0000000 --- a/r600/lib/atomic/atomic.cl +++ /dev/null @@ -1,65 +0,0 @@ -#include - -#define ATOMIC_FUNC_DEFINE(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ -_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE val) { \ - return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)val); \ -} - -/* For atomic functions that don't need different bitcode dependending on argument signedness */ -#define ATOMIC_FUNC_SIGN(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE); \ - ATOMIC_FUNC_DEFINE(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - ATOMIC_FUNC_DEFINE(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) - -#define ATOMIC_FUNC_ADDRSPACE(TYPE, FUNCTION) \ - ATOMIC_FUNC_SIGN(TYPE, FUNCTION, global, 1) \ - ATOMIC_FUNC_SIGN(TYPE, FUNCTION, local, 3) - -#define ATOMIC_FUNC(FUNCTION) \ - ATOMIC_FUNC_ADDRSPACE(int, FUNCTION) - -#define ATOMIC_FUNC_DEFINE_3_ARG(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ -_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE cmp, RET_SIGN TYPE val) { \ - return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)cmp, (ARG_SIGN TYPE)val); \ -} - -/* For atomic functions that don't need different bitcode dependending on argument signedness */ -#define ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE, signed TYPE); \ - ATOMIC_FUNC_DEFINE_3_ARG(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - ATOMIC_FUNC_DEFINE_3_ARG(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) - -#define ATOMIC_FUNC_ADDRSPACE_3_ARG(TYPE, FUNCTION) \ - ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, global, 1) \ - ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, local, 3) - -#define ATOMIC_FUNC_3_ARG(FUNCTION) \ - ATOMIC_FUNC_ADDRSPACE_3_ARG(int, FUNCTION) - -ATOMIC_FUNC(atomic_add) -ATOMIC_FUNC(atomic_and) -ATOMIC_FUNC(atomic_or) -ATOMIC_FUNC(atomic_sub) -ATOMIC_FUNC(atomic_xchg) -ATOMIC_FUNC(atomic_xor) -ATOMIC_FUNC_3_ARG(atomic_cmpxchg) - -_CLC_DECL signed int __clc_atomic_max_addr1(volatile global signed int*, signed int); -_CLC_DECL signed int __clc_atomic_max_addr3(volatile local signed int*, signed int); -_CLC_DECL uint __clc_atomic_umax_addr1(volatile global uint*, uint); -_CLC_DECL uint __clc_atomic_umax_addr3(volatile local uint*, uint); - -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, global, 1) -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, local, 3) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, global, 1) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, local, 3) - -_CLC_DECL signed int __clc_atomic_min_addr1(volatile global signed int*, signed int); -_CLC_DECL signed int __clc_atomic_min_addr3(volatile local signed int*, signed int); -_CLC_DECL uint __clc_atomic_umin_addr1(volatile global uint*, uint); -_CLC_DECL uint __clc_atomic_umin_addr3(volatile local uint*, uint); - -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, global, 1) -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, local, 3) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, global, 1) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, local, 3) diff --git a/r600/lib/image/get_image_attributes_impl.ll b/r600/lib/image/get_image_attributes_impl.ll deleted file mode 100644 index 7f1965d..0000000 --- a/r600/lib/image/get_image_attributes_impl.ll +++ /dev/null @@ -1,87 +0,0 @@ -%opencl.image2d_t = type opaque -%opencl.image3d_t = type opaque - -declare i32 @llvm.OpenCL.image.get.resource.id.2d( - %opencl.image2d_t addrspace(1)*) nounwind readnone -declare i32 @llvm.OpenCL.image.get.resource.id.3d( - %opencl.image3d_t addrspace(1)*) nounwind readnone - -declare [3 x i32] @llvm.OpenCL.image.get.size.2d( - %opencl.image2d_t addrspace(1)*) nounwind readnone -declare [3 x i32] @llvm.OpenCL.image.get.size.3d( - %opencl.image3d_t addrspace(1)*) nounwind readnone - -declare [2 x i32] @llvm.OpenCL.image.get.format.2d( - %opencl.image2d_t addrspace(1)*) nounwind readnone -declare [2 x i32] @llvm.OpenCL.image.get.format.3d( - %opencl.image3d_t addrspace(1)*) nounwind readnone - -define i32 @__clc_get_image_width_2d( - %opencl.image2d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d( - %opencl.image2d_t addrspace(1)* %img) - %2 = extractvalue [3 x i32] %1, 0 - ret i32 %2 -} -define i32 @__clc_get_image_width_3d( - %opencl.image3d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( - %opencl.image3d_t addrspace(1)* %img) - %2 = extractvalue [3 x i32] %1, 0 - ret i32 %2 -} - -define i32 @__clc_get_image_height_2d( - %opencl.image2d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d( - %opencl.image2d_t addrspace(1)* %img) - %2 = extractvalue [3 x i32] %1, 1 - ret i32 %2 -} -define i32 @__clc_get_image_height_3d( - %opencl.image3d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( - %opencl.image3d_t addrspace(1)* %img) - %2 = extractvalue [3 x i32] %1, 1 - ret i32 %2 -} - -define i32 @__clc_get_image_depth_3d( - %opencl.image3d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( - %opencl.image3d_t addrspace(1)* %img) - %2 = extractvalue [3 x i32] %1, 2 - ret i32 %2 -} - -define i32 @__clc_get_image_channel_data_type_2d( - %opencl.image2d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d( - %opencl.image2d_t addrspace(1)* %img) - %2 = extractvalue [2 x i32] %1, 0 - ret i32 %2 -} -define i32 @__clc_get_image_channel_data_type_3d( - %opencl.image3d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d( - %opencl.image3d_t addrspace(1)* %img) - %2 = extractvalue [2 x i32] %1, 0 - ret i32 %2 -} - -define i32 @__clc_get_image_channel_order_2d( - %opencl.image2d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d( - %opencl.image2d_t addrspace(1)* %img) - %2 = extractvalue [2 x i32] %1, 1 - ret i32 %2 -} -define i32 @__clc_get_image_channel_order_3d( - %opencl.image3d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d( - %opencl.image3d_t addrspace(1)* %img) - %2 = extractvalue [2 x i32] %1, 1 - ret i32 %2 -} - -attributes #0 = { nounwind readnone alwaysinline } diff --git a/r600/lib/image/get_image_channel_data_type.cl b/r600/lib/image/get_image_channel_data_type.cl deleted file mode 100644 index 2a2478f..0000000 --- a/r600/lib/image/get_image_channel_data_type.cl +++ /dev/null @@ -1,13 +0,0 @@ -#include - -_CLC_DECL int __clc_get_image_channel_data_type_2d(image2d_t); -_CLC_DECL int __clc_get_image_channel_data_type_3d(image3d_t); - -_CLC_OVERLOAD _CLC_DEF int -get_image_channel_data_type(image2d_t image) { - return __clc_get_image_channel_data_type_2d(image); -} -_CLC_OVERLOAD _CLC_DEF int -get_image_channel_data_type(image3d_t image) { - return __clc_get_image_channel_data_type_3d(image); -} diff --git a/r600/lib/image/get_image_channel_order.cl b/r600/lib/image/get_image_channel_order.cl deleted file mode 100644 index 91e9b89..0000000 --- a/r600/lib/image/get_image_channel_order.cl +++ /dev/null @@ -1,13 +0,0 @@ -#include - -_CLC_DECL int __clc_get_image_channel_order_2d(image2d_t); -_CLC_DECL int __clc_get_image_channel_order_3d(image3d_t); - -_CLC_OVERLOAD _CLC_DEF int -get_image_channel_order(image2d_t image) { - return __clc_get_image_channel_order_2d(image); -} -_CLC_OVERLOAD _CLC_DEF int -get_image_channel_order(image3d_t image) { - return __clc_get_image_channel_order_3d(image); -} diff --git a/r600/lib/image/get_image_depth.cl b/r600/lib/image/get_image_depth.cl deleted file mode 100644 index 1864645..0000000 --- a/r600/lib/image/get_image_depth.cl +++ /dev/null @@ -1,8 +0,0 @@ -#include - -_CLC_DECL int __clc_get_image_depth_3d(image3d_t); - -_CLC_OVERLOAD _CLC_DEF int -get_image_depth(image3d_t image) { - return __clc_get_image_depth_3d(image); -} diff --git a/r600/lib/image/get_image_height.cl b/r600/lib/image/get_image_height.cl deleted file mode 100644 index 80b3640..0000000 --- a/r600/lib/image/get_image_height.cl +++ /dev/null @@ -1,13 +0,0 @@ -#include - -_CLC_DECL int __clc_get_image_height_2d(image2d_t); -_CLC_DECL int __clc_get_image_height_3d(image3d_t); - -_CLC_OVERLOAD _CLC_DEF int -get_image_height(image2d_t image) { - return __clc_get_image_height_2d(image); -} -_CLC_OVERLOAD _CLC_DEF int -get_image_height(image3d_t image) { - return __clc_get_image_height_3d(image); -} diff --git a/r600/lib/image/get_image_width.cl b/r600/lib/image/get_image_width.cl deleted file mode 100644 index 29e4e94..0000000 --- a/r600/lib/image/get_image_width.cl +++ /dev/null @@ -1,13 +0,0 @@ -#include - -_CLC_DECL int __clc_get_image_width_2d(image2d_t); -_CLC_DECL int __clc_get_image_width_3d(image3d_t); - -_CLC_OVERLOAD _CLC_DEF int -get_image_width(image2d_t image) { - return __clc_get_image_width_2d(image); -} -_CLC_OVERLOAD _CLC_DEF int -get_image_width(image3d_t image) { - return __clc_get_image_width_3d(image); -} diff --git a/r600/lib/image/read_image_impl.ll b/r600/lib/image/read_image_impl.ll deleted file mode 100644 index 229a252..0000000 --- a/r600/lib/image/read_image_impl.ll +++ /dev/null @@ -1,46 +0,0 @@ -%opencl.image2d_t = type opaque - -declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, - i32, i32, i32) readnone -declare i32 @llvm.OpenCL.image.get.resource.id.2d( - %opencl.image2d_t addrspace(1)*) nounwind readnone -declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) readnone - -define <4 x float> @__clc_v4f_from_v2f(<2 x float> %v) alwaysinline { - %e0 = extractelement <2 x float> %v, i32 0 - %e1 = extractelement <2 x float> %v, i32 1 - %res.0 = insertelement <4 x float> undef, float %e0, i32 0 - %res.1 = insertelement <4 x float> %res.0, float %e1, i32 1 - %res.2 = insertelement <4 x float> %res.1, float 0.0, i32 2 - %res.3 = insertelement <4 x float> %res.2, float 0.0, i32 3 - ret <4 x float> %res.3 -} - -define <4 x float> @__clc_read_imagef_tex( - %opencl.image2d_t addrspace(1)* nocapture %img, - i32 %sampler, <2 x float> %coord) alwaysinline { -entry: - %coord_v4 = call <4 x float> @__clc_v4f_from_v2f(<2 x float> %coord) - %smp_id = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %sampler) - %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d( - %opencl.image2d_t addrspace(1)* %img) - %tex_id = add i32 %img_id, 2 ; First 2 IDs are reserved. - - %coord_norm = and i32 %sampler, 1 - %is_norm = icmp eq i32 %coord_norm, 1 - br i1 %is_norm, label %NormCoord, label %UnnormCoord -NormCoord: - %data.norm = call <4 x float> @llvm.R600.tex( - <4 x float> %coord_v4, - i32 0, i32 0, i32 0, ; Offset. - i32 2, i32 %smp_id, - i32 1, i32 1, i32 1, i32 1) ; Normalized coords. - ret <4 x float> %data.norm -UnnormCoord: - %data.unnorm = call <4 x float> @llvm.R600.tex( - <4 x float> %coord_v4, - i32 0, i32 0, i32 0, ; Offset. - i32 %tex_id, i32 %smp_id, - i32 0, i32 0, i32 0, i32 0) ; Unnormalized coords. - ret <4 x float> %data.unnorm -} diff --git a/r600/lib/image/read_imagef.cl b/r600/lib/image/read_imagef.cl deleted file mode 100644 index af80ada..0000000 --- a/r600/lib/image/read_imagef.cl +++ /dev/null @@ -1,14 +0,0 @@ -#include - -_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); - -_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, - int2 coord) { - float2 coord_float = (float2)(coord.x, coord.y); - return __clc_read_imagef_tex(image, sampler, coord_float); -} - -_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, - float2 coord) { - return __clc_read_imagef_tex(image, sampler, coord); -} diff --git a/r600/lib/image/read_imagei.cl b/r600/lib/image/read_imagei.cl deleted file mode 100644 index b973aae..0000000 --- a/r600/lib/image/read_imagei.cl +++ /dev/null @@ -1,23 +0,0 @@ -#include - -_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); - -int4 __clc_reinterpret_v4f_to_v4i(float4 v) { - union { - int4 v4i; - float4 v4f; - } res = { .v4f = v}; - return res.v4i; -} - -_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, - int2 coord) { - float2 coord_float = (float2)(coord.x, coord.y); - return __clc_reinterpret_v4f_to_v4i( - __clc_read_imagef_tex(image, sampler, coord_float)); -} -_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, - float2 coord) { - return __clc_reinterpret_v4f_to_v4i( - __clc_read_imagef_tex(image, sampler, coord)); -} diff --git a/r600/lib/image/read_imageui.cl b/r600/lib/image/read_imageui.cl deleted file mode 100644 index ec9836e..0000000 --- a/r600/lib/image/read_imageui.cl +++ /dev/null @@ -1,23 +0,0 @@ -#include - -_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); - -uint4 __clc_reinterpret_v4f_to_v4ui(float4 v) { - union { - uint4 v4ui; - float4 v4f; - } res = { .v4f = v}; - return res.v4ui; -} - -_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, - int2 coord) { - float2 coord_float = (float2)(coord.x, coord.y); - return __clc_reinterpret_v4f_to_v4ui( - __clc_read_imagef_tex(image, sampler, coord_float)); -} -_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, - float2 coord) { - return __clc_reinterpret_v4f_to_v4ui( - __clc_read_imagef_tex(image, sampler, coord)); -} diff --git a/r600/lib/image/write_image_impl.ll b/r600/lib/image/write_image_impl.ll deleted file mode 100644 index 265f5d6..0000000 --- a/r600/lib/image/write_image_impl.ll +++ /dev/null @@ -1,52 +0,0 @@ -%opencl.image2d_t = type opaque -%opencl.image3d_t = type opaque - -declare i32 @llvm.OpenCL.image.get.resource.id.2d( - %opencl.image2d_t addrspace(1)*) nounwind readnone -declare i32 @llvm.OpenCL.image.get.resource.id.3d( - %opencl.image3d_t addrspace(1)*) nounwind readnone - -declare void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord, i32 %rat_id) - -define void @__clc_write_imageui_2d( - %opencl.image2d_t addrspace(1)* nocapture %img, - <2 x i32> %coord, <4 x i32> %color) #0 { - - ; Coordinate int2 -> int4. - %e0 = extractelement <2 x i32> %coord, i32 0 - %e1 = extractelement <2 x i32> %coord, i32 1 - %coord.0 = insertelement <4 x i32> undef, i32 %e0, i32 0 - %coord.1 = insertelement <4 x i32> %coord.0, i32 %e1, i32 1 - %coord.2 = insertelement <4 x i32> %coord.1, i32 0, i32 2 - %coord.3 = insertelement <4 x i32> %coord.2, i32 0, i32 3 - - ; Get RAT ID. - %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d( - %opencl.image2d_t addrspace(1)* %img) - %rat_id = add i32 %img_id, 1 - - ; Call store intrinsic. - call void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord.3, i32 %rat_id) - ret void -} - -define void @__clc_write_imagei_2d( - %opencl.image2d_t addrspace(1)* nocapture %img, - <2 x i32> %coord, <4 x i32> %color) #0 { - call void @__clc_write_imageui_2d( - %opencl.image2d_t addrspace(1)* nocapture %img, - <2 x i32> %coord, <4 x i32> %color) - ret void -} - -define void @__clc_write_imagef_2d( - %opencl.image2d_t addrspace(1)* nocapture %img, - <2 x i32> %coord, <4 x float> %color) #0 { - %color.i32 = bitcast <4 x float> %color to <4 x i32> - call void @__clc_write_imageui_2d( - %opencl.image2d_t addrspace(1)* nocapture %img, - <2 x i32> %coord, <4 x i32> %color.i32) - ret void -} - -attributes #0 = { alwaysinline } diff --git a/r600/lib/image/write_imagef.cl b/r600/lib/image/write_imagef.cl deleted file mode 100644 index 4483fcf..0000000 --- a/r600/lib/image/write_imagef.cl +++ /dev/null @@ -1,9 +0,0 @@ -#include - -_CLC_DECL void __clc_write_imagef_2d(image2d_t image, int2 coord, float4 color); - -_CLC_OVERLOAD _CLC_DEF void -write_imagef(image2d_t image, int2 coord, float4 color) -{ - __clc_write_imagef_2d(image, coord, color); -} diff --git a/r600/lib/image/write_imagei.cl b/r600/lib/image/write_imagei.cl deleted file mode 100644 index 394a223..0000000 --- a/r600/lib/image/write_imagei.cl +++ /dev/null @@ -1,9 +0,0 @@ -#include - -_CLC_DECL void __clc_write_imagei_2d(image2d_t image, int2 coord, int4 color); - -_CLC_OVERLOAD _CLC_DEF void -write_imagei(image2d_t image, int2 coord, int4 color) -{ - __clc_write_imagei_2d(image, coord, color); -} diff --git a/r600/lib/image/write_imageui.cl b/r600/lib/image/write_imageui.cl deleted file mode 100644 index 91344de..0000000 --- a/r600/lib/image/write_imageui.cl +++ /dev/null @@ -1,9 +0,0 @@ -#include - -_CLC_DECL void __clc_write_imageui_2d(image2d_t image, int2 coord, uint4 color); - -_CLC_OVERLOAD _CLC_DEF void -write_imageui(image2d_t image, int2 coord, uint4 color) -{ - __clc_write_imageui_2d(image, coord, color); -} diff --git a/r600/lib/math/ldexp.cl b/r600/lib/math/ldexp.cl deleted file mode 100644 index 80439ce..0000000 --- a/r600/lib/math/ldexp.cl +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include - -#include "../../../generic/lib/clcmacro.h" - -#ifdef __HAS_LDEXPF__ -#define BUILTINF __builtin_amdgpu_ldexpf -#else -#include "math/clc_ldexp.h" -#define BUILTINF __clc_ldexp -#endif - -// This defines all the ldexp(floatN, intN) variants. -_CLC_DEFINE_BINARY_BUILTIN(float, ldexp, BUILTINF, float, int); - -#ifdef cl_khr_fp64 - #pragma OPENCL EXTENSION cl_khr_fp64 : enable - // This defines all the ldexp(doubleN, intN) variants. - _CLC_DEFINE_BINARY_BUILTIN(double, ldexp, __builtin_amdgpu_ldexp, double, int); -#endif - -// This defines all the ldexp(GENTYPE, int); -#define __CLC_BODY <../../../generic/lib/math/ldexp.inc> -#include - -#undef BUILTINF diff --git a/r600/lib/math/nextafter.cl b/r600/lib/math/nextafter.cl deleted file mode 100644 index 4611c81..0000000 --- a/r600/lib/math/nextafter.cl +++ /dev/null @@ -1,4 +0,0 @@ -#include -#include "../lib/clcmacro.h" - -_CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float) diff --git a/r600/lib/math/sqrt.cl b/r600/lib/math/sqrt.cl deleted file mode 100644 index 3e5b17c..0000000 --- a/r600/lib/math/sqrt.cl +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include -#include "../../../generic/lib/clcmacro.h" -#include "math/clc_sqrt.h" - -_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float) - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - - -_CLC_OVERLOAD _CLC_DEF double sqrt(double x) { - - uint vcc = x < 0x1p-767; - uint exp0 = vcc ? 0x100 : 0; - unsigned exp1 = vcc ? 0xffffff80 : 0; - - double v01 = ldexp(x, exp0); - double v23 = __builtin_amdgpu_rsq(v01); - double v45 = v01 * v23; - v23 = v23 * 0.5; - - double v67 = fma(-v23, v45, 0.5); - v45 = fma(v45, v67, v45); - double v89 = fma(-v45, v45, v01); - v23 = fma(v23, v67, v23); - v45 = fma(v89, v23, v45); - v67 = fma(-v45, v45, v01); - v23 = fma(v67, v23, v45); - - v23 = ldexp(v23, exp1); - return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double); - -#endif diff --git a/r600/lib/synchronization/barrier.cl b/r600/lib/synchronization/barrier.cl deleted file mode 100644 index 6f2900b..0000000 --- a/r600/lib/synchronization/barrier.cl +++ /dev/null @@ -1,10 +0,0 @@ - -#include - -_CLC_DEF int __clc_clk_local_mem_fence() { - return CLK_LOCAL_MEM_FENCE; -} - -_CLC_DEF int __clc_clk_global_mem_fence() { - return CLK_GLOBAL_MEM_FENCE; -} diff --git a/r600/lib/synchronization/barrier_impl.ll b/r600/lib/synchronization/barrier_impl.ll index 3d8ee66..825b2eb 100644 --- a/r600/lib/synchronization/barrier_impl.ll +++ b/r600/lib/synchronization/barrier_impl.ll @@ -1,9 +1,9 @@ -declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline -declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline -declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate -declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate +declare i32 @__clc_clk_local_mem_fence() #1 +declare i32 @__clc_clk_global_mem_fence() #1 +declare void @llvm.AMDGPU.barrier.local() #0 +declare void @llvm.AMDGPU.barrier.global() #0 -define void @barrier(i32 %flags) nounwind noduplicate alwaysinline { +define void @barrier(i32 %flags) #2 { barrier_local_test: %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence() %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE @@ -11,7 +11,7 @@ barrier_local_test: br i1 %1, label %barrier_local, label %barrier_global_test barrier_local: - call void @llvm.AMDGPU.barrier.local() noduplicate + call void @llvm.AMDGPU.barrier.local() br label %barrier_global_test barrier_global_test: @@ -21,9 +21,13 @@ barrier_global_test: br i1 %3, label %barrier_global, label %done barrier_global: - call void @llvm.AMDGPU.barrier.global() noduplicate + call void @llvm.AMDGPU.barrier.global() br label %done done: ret void } + +attributes #0 = { nounwind convergent } +attributes #1 = { nounwind alwaysinline } +attributes #2 = { nounwind convergent alwaysinline } diff --git a/r600/lib/workitem/get_global_size.ll b/r600/lib/workitem/get_global_size.ll deleted file mode 100644 index ac2d08d..0000000 --- a/r600/lib/workitem/get_global_size.ll +++ /dev/null @@ -1,18 +0,0 @@ -declare i32 @llvm.r600.read.global.size.x() nounwind readnone -declare i32 @llvm.r600.read.global.size.y() nounwind readnone -declare i32 @llvm.r600.read.global.size.z() nounwind readnone - -define i32 @get_global_size(i32 %dim) nounwind readnone alwaysinline { - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] -x_dim: - %x = call i32 @llvm.r600.read.global.size.x() nounwind readnone - ret i32 %x -y_dim: - %y = call i32 @llvm.r600.read.global.size.y() nounwind readnone - ret i32 %y -z_dim: - %z = call i32 @llvm.r600.read.global.size.z() nounwind readnone - ret i32 %z -default: - ret i32 0 -} diff --git a/r600/lib/workitem/get_group_id.ll b/r600/lib/workitem/get_group_id.ll deleted file mode 100644 index 0dc86e5..0000000 --- a/r600/lib/workitem/get_group_id.ll +++ /dev/null @@ -1,18 +0,0 @@ -declare i32 @llvm.r600.read.tgid.x() nounwind readnone -declare i32 @llvm.r600.read.tgid.y() nounwind readnone -declare i32 @llvm.r600.read.tgid.z() nounwind readnone - -define i32 @get_group_id(i32 %dim) nounwind readnone alwaysinline { - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] -x_dim: - %x = call i32 @llvm.r600.read.tgid.x() nounwind readnone - ret i32 %x -y_dim: - %y = call i32 @llvm.r600.read.tgid.y() nounwind readnone - ret i32 %y -z_dim: - %z = call i32 @llvm.r600.read.tgid.z() nounwind readnone - ret i32 %z -default: - ret i32 0 -} diff --git a/r600/lib/workitem/get_local_id.ll b/r600/lib/workitem/get_local_id.ll deleted file mode 100644 index ac5522a..0000000 --- a/r600/lib/workitem/get_local_id.ll +++ /dev/null @@ -1,18 +0,0 @@ -declare i32 @llvm.r600.read.tidig.x() nounwind readnone -declare i32 @llvm.r600.read.tidig.y() nounwind readnone -declare i32 @llvm.r600.read.tidig.z() nounwind readnone - -define i32 @get_local_id(i32 %dim) nounwind readnone alwaysinline { - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] -x_dim: - %x = call i32 @llvm.r600.read.tidig.x() nounwind readnone - ret i32 %x -y_dim: - %y = call i32 @llvm.r600.read.tidig.y() nounwind readnone - ret i32 %y -z_dim: - %z = call i32 @llvm.r600.read.tidig.z() nounwind readnone - ret i32 %z -default: - ret i32 0 -} diff --git a/r600/lib/workitem/get_local_size.ll b/r600/lib/workitem/get_local_size.ll deleted file mode 100644 index 0a98de6..0000000 --- a/r600/lib/workitem/get_local_size.ll +++ /dev/null @@ -1,18 +0,0 @@ -declare i32 @llvm.r600.read.local.size.x() nounwind readnone -declare i32 @llvm.r600.read.local.size.y() nounwind readnone -declare i32 @llvm.r600.read.local.size.z() nounwind readnone - -define i32 @get_local_size(i32 %dim) nounwind readnone alwaysinline { - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] -x_dim: - %x = call i32 @llvm.r600.read.local.size.x() nounwind readnone - ret i32 %x -y_dim: - %y = call i32 @llvm.r600.read.local.size.y() nounwind readnone - ret i32 %y -z_dim: - %z = call i32 @llvm.r600.read.local.size.z() nounwind readnone - ret i32 %z -default: - ret i32 0 -} diff --git a/r600/lib/workitem/get_num_groups.ll b/r600/lib/workitem/get_num_groups.ll deleted file mode 100644 index a708f42..0000000 --- a/r600/lib/workitem/get_num_groups.ll +++ /dev/null @@ -1,18 +0,0 @@ -declare i32 @llvm.r600.read.ngroups.x() nounwind readnone -declare i32 @llvm.r600.read.ngroups.y() nounwind readnone -declare i32 @llvm.r600.read.ngroups.z() nounwind readnone - -define i32 @get_num_groups(i32 %dim) nounwind readnone alwaysinline { - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] -x_dim: - %x = call i32 @llvm.r600.read.ngroups.x() nounwind readnone - ret i32 %x -y_dim: - %y = call i32 @llvm.r600.read.ngroups.y() nounwind readnone - ret i32 %y -z_dim: - %z = call i32 @llvm.r600.read.ngroups.z() nounwind readnone - ret i32 %z -default: - ret i32 0 -} diff --git a/r600/lib/workitem/get_work_dim.ll b/r600/lib/workitem/get_work_dim.ll deleted file mode 100644 index 1f86b5e..0000000 --- a/r600/lib/workitem/get_work_dim.ll +++ /dev/null @@ -1,8 +0,0 @@ -declare i32 @llvm.AMDGPU.read.workdim() nounwind readnone - -define i32 @get_work_dim() nounwind readnone alwaysinline { - %x = call i32 @llvm.AMDGPU.read.workdim() nounwind readnone , !range !0 - ret i32 %x -} - -!0 = !{ i32 1, i32 4 } -- cgit v1.2.3