diff options
author | Peter Collingbourne <peter@pcc.me.uk> | 2012-08-05 22:25:12 +0000 |
---|---|---|
committer | Peter Collingbourne <peter@pcc.me.uk> | 2012-08-05 22:25:12 +0000 |
commit | 3b230ffdb7c8c312e327d18f23e3d91ec8ae0511 (patch) | |
tree | 239f2ecb4a64917ca6ffa817702fe35119ad6107 | |
parent | 05edc47f68b94fb1fc6a6e6f2a5baabb2abd9b59 (diff) |
Implement sub_sat builtin. Patch by Lei Mou!
git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@161312 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | generic/include/clc/clc.h | 1 | ||||
-rw-r--r-- | generic/include/clc/integer/sub_sat.h | 2 | ||||
-rw-r--r-- | generic/include/clc/integer/sub_sat.inc | 1 | ||||
-rw-r--r-- | generic/lib/SOURCES | 3 | ||||
-rw-r--r-- | generic/lib/integer/sub_sat.cl | 52 | ||||
-rw-r--r-- | generic/lib/integer/sub_sat.ll | 55 | ||||
-rw-r--r-- | generic/lib/integer/sub_sat_impl.ll | 83 | ||||
-rw-r--r-- | ptx/lib/SOURCES | 1 | ||||
-rw-r--r-- | ptx/lib/integer/sub_sat.ll | 55 | ||||
-rw-r--r-- | test/subsat.cl | 19 |
10 files changed, 272 insertions, 0 deletions
diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h index b0cbd4a..8b41523 100644 --- a/generic/include/clc/clc.h +++ b/generic/include/clc/clc.h @@ -57,6 +57,7 @@ #include <clc/integer/abs.h> #include <clc/integer/abs_diff.h> #include <clc/integer/add_sat.h> +#include <clc/integer/sub_sat.h> /* 6.11.5 Geometric Functions */ #include <clc/geometric/cross.h> diff --git a/generic/include/clc/integer/sub_sat.h b/generic/include/clc/integer/sub_sat.h new file mode 100644 index 0000000..942274d --- /dev/null +++ b/generic/include/clc/integer/sub_sat.h @@ -0,0 +1,2 @@ +#define BODY <clc/integer/sub_sat.inc> +#include <clc/integer/gentype.inc> diff --git a/generic/include/clc/integer/sub_sat.inc b/generic/include/clc/integer/sub_sat.inc new file mode 100644 index 0000000..3e0f8f9 --- /dev/null +++ b/generic/include/clc/integer/sub_sat.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL GENTYPE sub_sat(GENTYPE x, GENTYPE y); diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES index 0608116..344c865 100644 --- a/generic/lib/SOURCES +++ b/generic/lib/SOURCES @@ -7,5 +7,8 @@ integer/abs.cl integer/add_sat.cl integer/add_sat.ll integer/add_sat_impl.ll +integer/sub_sat.cl +integer/sub_sat.ll +integer/sub_sat_impl.ll math/hypot.cl math/mad.cl diff --git a/generic/lib/integer/sub_sat.cl b/generic/lib/integer/sub_sat.cl new file mode 100644 index 0000000..9555b6d --- /dev/null +++ b/generic/lib/integer/sub_sat.cl @@ -0,0 +1,52 @@ +#include <clc/clc.h> + +// From sub_sat.ll +_CLC_DECL char __clc_sub_sat_s8(char, char); +_CLC_DECL uchar __clc_sub_sat_u8(uchar, uchar); +_CLC_DECL short __clc_sub_sat_s16(short, short); +_CLC_DECL ushort __clc_sub_sat_u16(ushort, ushort); +_CLC_DECL int __clc_sub_sat_s32(int, int); +_CLC_DECL uint __clc_sub_sat_u32(uint, uint); +_CLC_DECL long __clc_sub_sat_s64(long, long); +_CLC_DECL ulong __clc_sub_sat_u64(ulong, ulong); + +_CLC_OVERLOAD _CLC_DEF char sub_sat(char x, char y) { + return __clc_sub_sat_s8(x, y); +} + +_CLC_OVERLOAD _CLC_DEF uchar sub_sat(uchar x, uchar y) { + return __clc_sub_sat_u8(x, y); +} + +_CLC_OVERLOAD _CLC_DEF short sub_sat(short x, short y) { + return __clc_sub_sat_s16(x, y); +} + +_CLC_OVERLOAD _CLC_DEF ushort sub_sat(ushort x, ushort y) { + return __clc_sub_sat_u16(x, y); +} + +_CLC_OVERLOAD _CLC_DEF int sub_sat(int x, int y) { + return __clc_sub_sat_s32(x, y); +} + +_CLC_OVERLOAD _CLC_DEF uint sub_sat(uint x, uint y) { + return __clc_sub_sat_u32(x, y); +} + +_CLC_OVERLOAD _CLC_DEF long sub_sat(long x, long y) { + return __clc_sub_sat_s64(x, y); +} + +_CLC_OVERLOAD _CLC_DEF ulong sub_sat(ulong x, ulong y) { + return __clc_sub_sat_u64(x, y); +} + +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, sub_sat, char, char) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, sub_sat, uchar, uchar) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, sub_sat, short, short) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, sub_sat, ushort, ushort) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, sub_sat, int, int) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, sub_sat, uint, uint) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, sub_sat, long, long) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, sub_sat, ulong, ulong) diff --git a/generic/lib/integer/sub_sat.ll b/generic/lib/integer/sub_sat.ll new file mode 100644 index 0000000..f223a0e --- /dev/null +++ b/generic/lib/integer/sub_sat.ll @@ -0,0 +1,55 @@ +declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + +define linkonce_odr i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + ret i8 %call +} + +declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + +define linkonce_odr i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + ret i8 %call +} + +declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + +define linkonce_odr i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + ret i16 %call +} + +declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + +define linkonce_odr i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + ret i16 %call +} + +declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + +define linkonce_odr i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + ret i32 %call +} + +declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + +define linkonce_odr i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + ret i32 %call +} + +declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + +define linkonce_odr i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + ret i64 %call +} + +declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + +define linkonce_odr i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + ret i64 %call +} diff --git a/generic/lib/integer/sub_sat_impl.ll b/generic/lib/integer/sub_sat_impl.ll new file mode 100644 index 0000000..99abbc3 --- /dev/null +++ b/generic/lib/integer/sub_sat_impl.ll @@ -0,0 +1,83 @@ +declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8) +declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8) + +define linkonce_odr i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) + %res = extractvalue {i8, i1} %call, 0 + %over = extractvalue {i8, i1} %call, 1 + %x.msb = ashr i8 %x, 7 + %x.limit = xor i8 %x.msb, 127 + %sat = select i1 %over, i8 %x.limit, i8 %res + ret i8 %sat +} + +define linkonce_odr i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %x, i8 %y) + %res = extractvalue {i8, i1} %call, 0 + %over = extractvalue {i8, i1} %call, 1 + %sat = select i1 %over, i8 0, i8 %res + ret i8 %sat +} + +declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16) +declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16) + +define linkonce_odr i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %x, i16 %y) + %res = extractvalue {i16, i1} %call, 0 + %over = extractvalue {i16, i1} %call, 1 + %x.msb = ashr i16 %x, 15 + %x.limit = xor i16 %x.msb, 32767 + %sat = select i1 %over, i16 %x.limit, i16 %res + ret i16 %sat +} + +define linkonce_odr i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %x, i16 %y) + %res = extractvalue {i16, i1} %call, 0 + %over = extractvalue {i16, i1} %call, 1 + %sat = select i1 %over, i16 0, i16 %res + ret i16 %sat +} + +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) + +define linkonce_odr i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %x, i32 %y) + %res = extractvalue {i32, i1} %call, 0 + %over = extractvalue {i32, i1} %call, 1 + %x.msb = ashr i32 %x, 31 + %x.limit = xor i32 %x.msb, 2147483647 + %sat = select i1 %over, i32 %x.limit, i32 %res + ret i32 %sat +} + +define linkonce_odr i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y) + %res = extractvalue {i32, i1} %call, 0 + %over = extractvalue {i32, i1} %call, 1 + %sat = select i1 %over, i32 0, i32 %res + ret i32 %sat +} + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) + +define linkonce_odr i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %x, i64 %y) + %res = extractvalue {i64, i1} %call, 0 + %over = extractvalue {i64, i1} %call, 1 + %x.msb = ashr i64 %x, 63 + %x.limit = xor i64 %x.msb, 9223372036854775807 + %sat = select i1 %over, i64 %x.limit, i64 %res + ret i64 %sat +} + +define linkonce_odr i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %x, i64 %y) + %res = extractvalue {i64, i1} %call, 0 + %over = extractvalue {i64, i1} %call, 1 + %sat = select i1 %over, i64 0, i64 %res + ret i64 %sat +} diff --git a/ptx/lib/SOURCES b/ptx/lib/SOURCES index aab8e3f..fb6e17f 100644 --- a/ptx/lib/SOURCES +++ b/ptx/lib/SOURCES @@ -1 +1,2 @@ integer/add_sat.ll +integer/sub_sat.ll
\ No newline at end of file diff --git a/ptx/lib/integer/sub_sat.ll b/ptx/lib/integer/sub_sat.ll new file mode 100644 index 0000000..6a51a52 --- /dev/null +++ b/ptx/lib/integer/sub_sat.ll @@ -0,0 +1,55 @@ +declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + +define linkonce_odr ptx_device i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + ret i8 %call +} + +declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + +define linkonce_odr ptx_device i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + ret i8 %call +} + +declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + +define linkonce_odr ptx_device i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + ret i16 %call +} + +declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + +define linkonce_odr ptx_device i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + ret i16 %call +} + +declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + +define linkonce_odr ptx_device i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + ret i32 %call +} + +declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + +define linkonce_odr ptx_device i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + ret i32 %call +} + +declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + +define linkonce_odr ptx_device i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + ret i64 %call +} + +declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + +define linkonce_odr ptx_device i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + ret i64 %call +} diff --git a/test/subsat.cl b/test/subsat.cl new file mode 100644 index 0000000..a83414b --- /dev/null +++ b/test/subsat.cl @@ -0,0 +1,19 @@ +__kernel void test_subsat_char(char *a, char x, char y) { + *a = sub_sat(x, y); + return; +} + +__kernel void test_subsat_uchar(uchar *a, uchar x, uchar y) { + *a = sub_sat(x, y); + return; +} + +__kernel void test_subsat_long(long *a, long x, long y) { + *a = sub_sat(x, y); + return; +} + +__kernel void test_subsat_ulong(ulong *a, ulong x, ulong y) { + *a = sub_sat(x, y); + return; +}
\ No newline at end of file |