diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2012-10-09 18:59:35 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2012-10-09 18:59:35 +0000 |
commit | c6fad48798dac1544a90e415dacffda4bd4eca8f (patch) | |
tree | 52cf00de32cf7710ad8e1744b42a724d8ca4e662 | |
parent | f46b2c8112ca81dd15cc372104006c655075f3a0 (diff) | |
parent | 14e5a4f2116dd512d3bd6489d55760aa487cc3dd (diff) |
Merge branch 'master' of http://llvm.org/git/libclc
Conflicts:
generic/include/clc/synchronization/barrier.h
generic/include/clc/workitem/get_global_id.h
generic/include/clc/workitem/get_global_size.h
generic/include/clc/workitem/get_group_id.h
generic/include/clc/workitem/get_local_id.h
generic/include/clc/workitem/get_local_size.h
generic/include/clc/workitem/get_num_groups.h
39 files changed, 439 insertions, 72 deletions
diff --git a/build/metabuild.py b/build/metabuild.py index 65870ac..c675e09 100644 --- a/build/metabuild.py +++ b/build/metabuild.py @@ -9,6 +9,7 @@ class Make(object): self.rules = {} self.rule_text = '' self.all_targets = [] + self.default_targets = [] self.clean_files = [] self.distclean_files = [] self.output.write("""all:: @@ -68,8 +69,11 @@ endif return input return [input] + def default(self, paths): + self.default_targets += self._as_list(paths) + def finish(self): - self.output.write('all:: %s\n\n' % ' '.join(self.all_targets)) + self.output.write('all:: %s\n\n' % ' '.join(self.default_targets or self.all_targets)) self.output.write('clean: \n\trm -f %s\n\n' % ' '.join(self.clean_files)) self.output.write('distclean: clean\n\trm -f %s\n' % ' '.join(self.distclean_files)) diff --git a/configure.py b/configure.py index 0236345..83a9375 100755 --- a/configure.py +++ b/configure.py @@ -17,6 +17,8 @@ import metabuild p = OptionParser() p.add_option('--with-llvm-config', metavar='PATH', help='use given llvm-config script') +p.add_option('--prefix', metavar='PATH', + help='install to given prefix') p.add_option('-g', metavar='GENERATOR', default='make', help='use given generator (default: make)') (options, args) = p.parse_args() @@ -70,6 +72,9 @@ b.rule("PREPARE_BUILTINS", "%s -o $out $in" % prepare_builtins, manifest_deps = set([sys.argv[0], os.path.join(srcdir, 'build', 'metabuild.py'), os.path.join(srcdir, 'build', 'ninja_syntax.py')]) +install_files = [] +install_deps = [] + for target in targets: (t_arch, t_vendor, t_os) = target.split('-') archs = [t_arch] @@ -83,12 +88,13 @@ for target in targets: subdirs.append("%s-%s" % (arch, t_os)) subdirs.append(arch) - subdirs = [subdir for subdir in subdirs - if os.path.isdir(os.path.join(srcdir, subdir, 'include')) or - os.path.isfile(os.path.join(srcdir, subdir, 'lib', 'SOURCES'))] + incdirs = filter(os.path.isdir, + [os.path.join(srcdir, subdir, 'include') for subdir in subdirs]) + libdirs = filter(lambda d: os.path.isfile(os.path.join(d, 'SOURCES')), + [os.path.join(srcdir, subdir, 'lib') for subdir in subdirs]) - clang_cl_includes = ' '.join(["-I%s" % os.path.join(srcdir, subdir, 'include') - for subdir in subdirs]) + clang_cl_includes = ' '.join(["-I%s" % incdir for incdir in incdirs]) + install_files += [(incdir, incdir[len(srcdir)+1:]) for incdir in incdirs] # The rule for building a .bc file for the specified architecture using clang. clang_bc_flags = "-ccc-host-triple %s -I`dirname $in` %s " \ @@ -101,11 +107,8 @@ for target in targets: objects = [] sources_seen = set() - for subdir in subdirs: - src_libdir = os.path.join(srcdir, subdir, 'lib') - if not os.path.isdir(src_libdir): - continue - subdir_list_file = os.path.join(src_libdir, 'SOURCES') + for libdir in libdirs: + subdir_list_file = os.path.join(libdir, 'SOURCES') manifest_deps.add(subdir_list_file) for src in open(subdir_list_file).readlines(): src = src.rstrip() @@ -113,7 +116,7 @@ for target in targets: sources_seen.add(src) obj = os.path.join(target, 'lib', src + '.bc') objects.append(obj) - src_file = os.path.join(src_libdir, src) + src_file = os.path.join(libdir, src) ext = os.path.splitext(src)[1] if ext == '.ll': b.build(obj, 'LLVM_AS', src_file) @@ -126,6 +129,18 @@ for target in targets: b.build(builtins_link_bc, "LLVM_LINK", objects) b.build(builtins_opt_bc, "OPT", builtins_link_bc) b.build(builtins_bc, "PREPARE_BUILTINS", builtins_opt_bc, prepare_builtins) + install_files.append((builtins_bc, builtins_bc)) + install_deps.append(builtins_bc) + b.default(builtins_bc) + +if options.prefix: + install_cmd = ' && '.join(['mkdir -p %(dst)s && cp -r %(src)s %(dst)s' % + {'src': file, + 'dst': os.path.join(options.prefix, + os.path.dirname(dest))} + for (file, dest) in install_files]) + b.rule('install', command = install_cmd, description = 'INSTALL') + b.build('install', 'install', install_deps) b.rule("configure", command = ' '.join(sys.argv), description = 'CONFIGURE', generator = True) diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h index b0cbd4a..315693b 100644 --- a/generic/include/clc/clc.h +++ b/generic/include/clc/clc.h @@ -36,6 +36,7 @@ #include <clc/math/exp.h> #include <clc/math/exp2.h> #include <clc/math/fabs.h> +#include <clc/math/floor.h> #include <clc/math/fma.h> #include <clc/math/hypot.h> #include <clc/math/log.h> @@ -50,13 +51,16 @@ #include <clc/math/native_exp2.h> #include <clc/math/native_log.h> #include <clc/math/native_log2.h> +#include <clc/math/native_powr.h> #include <clc/math/native_sin.h> #include <clc/math/native_sqrt.h> +#include <clc/math/rsqrt.h> /* 6.11.3 Integer Functions */ #include <clc/integer/abs.h> #include <clc/integer/abs_diff.h> #include <clc/integer/add_sat.h> +#include <clc/integer/sub_sat.h> /* 6.11.5 Geometric Functions */ #include <clc/geometric/cross.h> @@ -65,6 +69,7 @@ #include <clc/geometric/normalize.h> /* 6.11.6 Relational Functions */ +#include <clc/relational/any.h> #include <clc/relational/select.h> /* 6.11.8 Synchronization Functions */ diff --git a/generic/include/clc/integer/sub_sat.h b/generic/include/clc/integer/sub_sat.h new file mode 100644 index 0000000..942274d --- /dev/null +++ b/generic/include/clc/integer/sub_sat.h @@ -0,0 +1,2 @@ +#define BODY <clc/integer/sub_sat.inc> +#include <clc/integer/gentype.inc> diff --git a/generic/include/clc/integer/sub_sat.inc b/generic/include/clc/integer/sub_sat.inc new file mode 100644 index 0000000..3e0f8f9 --- /dev/null +++ b/generic/include/clc/integer/sub_sat.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL GENTYPE sub_sat(GENTYPE x, GENTYPE y); diff --git a/generic/include/clc/math/floor.h b/generic/include/clc/math/floor.h new file mode 100644 index 0000000..abb7c2a --- /dev/null +++ b/generic/include/clc/math/floor.h @@ -0,0 +1,6 @@ +#undef floor +#define floor __clc_floor + +#define FUNCTION __clc_floor +#define INTRINSIC "llvm.floor" +#include <clc/math/unary_intrin.inc> diff --git a/generic/include/clc/math/native_powr.h b/generic/include/clc/math/native_powr.h new file mode 100644 index 0000000..e8a37d9 --- /dev/null +++ b/generic/include/clc/math/native_powr.h @@ -0,0 +1 @@ +#define native_powr pow diff --git a/generic/include/clc/math/rsqrt.h b/generic/include/clc/math/rsqrt.h new file mode 100644 index 0000000..8fd2cbf --- /dev/null +++ b/generic/include/clc/math/rsqrt.h @@ -0,0 +1 @@ +#define rsqrt(x) (1.f/sqrt(x))
diff --git a/generic/include/clc/relational/any.h b/generic/include/clc/relational/any.h new file mode 100644 index 0000000..4687ed2 --- /dev/null +++ b/generic/include/clc/relational/any.h @@ -0,0 +1,16 @@ + +#define _CLC_ANY_DECL(TYPE) \ + _CLC_OVERLOAD _CLC_DECL int any(TYPE v); + +#define _CLC_VECTOR_ANY_DECL(TYPE) \ + _CLC_ANY_DECL(TYPE) \ + _CLC_ANY_DECL(TYPE##2) \ + _CLC_ANY_DECL(TYPE##3) \ + _CLC_ANY_DECL(TYPE##4) \ + _CLC_ANY_DECL(TYPE##8) \ + _CLC_ANY_DECL(TYPE##16) + +_CLC_VECTOR_ANY_DECL(char) +_CLC_VECTOR_ANY_DECL(short) +_CLC_VECTOR_ANY_DECL(int) +_CLC_VECTOR_ANY_DECL(long) diff --git a/generic/include/clc/synchronization/barrier.h b/generic/include/clc/synchronization/barrier.h index 9ef2360..7167a3d 100644 --- a/generic/include/clc/synchronization/barrier.h +++ b/generic/include/clc/synchronization/barrier.h @@ -1 +1 @@ -void barrier(cl_mem_fence_flags flags); +_CLC_DECL void barrier(cl_mem_fence_flags flags); diff --git a/generic/include/clc/workitem/get_global_id.h b/generic/include/clc/workitem/get_global_id.h index b61450f..92759f1 100644 --- a/generic/include/clc/workitem/get_global_id.h +++ b/generic/include/clc/workitem/get_global_id.h @@ -1 +1 @@ -size_t get_global_id(uint dim); +_CLC_DECL size_t get_global_id(uint dim); diff --git a/generic/include/clc/workitem/get_global_size.h b/generic/include/clc/workitem/get_global_size.h index 8d7b9a1..2f83705 100644 --- a/generic/include/clc/workitem/get_global_size.h +++ b/generic/include/clc/workitem/get_global_size.h @@ -1 +1 @@ -size_t get_global_size(uint dim); +_CLC_DECL size_t get_global_size(uint dim); diff --git a/generic/include/clc/workitem/get_group_id.h b/generic/include/clc/workitem/get_group_id.h index 3d904b9..346c82c 100644 --- a/generic/include/clc/workitem/get_group_id.h +++ b/generic/include/clc/workitem/get_group_id.h @@ -1 +1 @@ -size_t get_group_id(uint dim); +_CLC_DECL size_t get_group_id(uint dim); diff --git a/generic/include/clc/workitem/get_local_id.h b/generic/include/clc/workitem/get_local_id.h index f584fab..169aeed 100644 --- a/generic/include/clc/workitem/get_local_id.h +++ b/generic/include/clc/workitem/get_local_id.h @@ -1 +1 @@ -size_t get_local_id(uint dim); +_CLC_DECL size_t get_local_id(uint dim); diff --git a/generic/include/clc/workitem/get_local_size.h b/generic/include/clc/workitem/get_local_size.h index 22a96aa..040ec58 100644 --- a/generic/include/clc/workitem/get_local_size.h +++ b/generic/include/clc/workitem/get_local_size.h @@ -1 +1 @@ -size_t get_local_size(uint dim); +_CLC_DECL size_t get_local_size(uint dim); diff --git a/generic/include/clc/workitem/get_num_groups.h b/generic/include/clc/workitem/get_num_groups.h index b9baad6..e555c7e 100644 --- a/generic/include/clc/workitem/get_num_groups.h +++ b/generic/include/clc/workitem/get_num_groups.h @@ -1 +1 @@ -size_t get_num_groups(uint dim); +_CLC_DECL size_t get_num_groups(uint dim); diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES index 0608116..d29ca1f 100644 --- a/generic/lib/SOURCES +++ b/generic/lib/SOURCES @@ -7,5 +7,11 @@ integer/abs.cl integer/add_sat.cl integer/add_sat.ll integer/add_sat_impl.ll +integer/sub_sat.cl +integer/sub_sat.ll +integer/sub_sat_impl.ll math/hypot.cl math/mad.cl +relational/any.cl +workitem/get_global_id.cl +workitem/get_global_size.cl diff --git a/generic/lib/integer/add_sat.cl b/generic/lib/integer/add_sat.cl index aae2e7f..7eb4e39 100644 --- a/generic/lib/integer/add_sat.cl +++ b/generic/lib/integer/add_sat.cl @@ -1,14 +1,14 @@ #include <clc/clc.h> // From add_sat.ll -_CLC_DECL char __clc_add_sat_s8(char, char); -_CLC_DECL char __clc_add_sat_u8(uchar, uchar); -_CLC_DECL char __clc_add_sat_s16(short, short); -_CLC_DECL char __clc_add_sat_u16(ushort, ushort); -_CLC_DECL char __clc_add_sat_s32(int, int); -_CLC_DECL char __clc_add_sat_u32(uint, uint); -_CLC_DECL char __clc_add_sat_s64(long, long); -_CLC_DECL char __clc_add_sat_u64(ulong, ulong); +_CLC_DECL char __clc_add_sat_s8(char, char); +_CLC_DECL uchar __clc_add_sat_u8(uchar, uchar); +_CLC_DECL short __clc_add_sat_s16(short, short); +_CLC_DECL ushort __clc_add_sat_u16(ushort, ushort); +_CLC_DECL int __clc_add_sat_s32(int, int); +_CLC_DECL uint __clc_add_sat_u32(uint, uint); +_CLC_DECL long __clc_add_sat_s64(long, long); +_CLC_DECL ulong __clc_add_sat_u64(ulong, ulong); _CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) { return __clc_add_sat_s8(x, y); diff --git a/generic/lib/integer/add_sat.ll b/generic/lib/integer/add_sat.ll index d6814c3..bcbe4c0 100644 --- a/generic/lib/integer/add_sat.ll +++ b/generic/lib/integer/add_sat.ll @@ -1,55 +1,55 @@ declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) -define linkonce_odr i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) ret i8 %call } declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) -define linkonce_odr i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) ret i8 %call } declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) -define linkonce_odr i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) ret i16 %call } declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) -define linkonce_odr i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) ret i16 %call } declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) -define linkonce_odr i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) ret i32 %call } declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) -define linkonce_odr i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) ret i32 %call } declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) -define linkonce_odr i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) ret i64 %call } declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) -define linkonce_odr i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) ret i64 %call } diff --git a/generic/lib/integer/add_sat_impl.ll b/generic/lib/integer/add_sat_impl.ll index 92f4c53..c150ecb 100644 --- a/generic/lib/integer/add_sat_impl.ll +++ b/generic/lib/integer/add_sat_impl.ll @@ -1,7 +1,7 @@ declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8) -define linkonce_odr i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) %res = extractvalue {i8, i1} %call, 0 %over = extractvalue {i8, i1} %call, 1 @@ -11,7 +11,7 @@ define linkonce_odr i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone al ret i8 %sat } -define linkonce_odr i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y) %res = extractvalue {i8, i1} %call, 0 %over = extractvalue {i8, i1} %call, 1 @@ -22,7 +22,7 @@ define linkonce_odr i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone al declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16) -define linkonce_odr i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %x, i16 %y) %res = extractvalue {i16, i1} %call, 0 %over = extractvalue {i16, i1} %call, 1 @@ -32,7 +32,7 @@ define linkonce_odr i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnon ret i16 %sat } -define linkonce_odr i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y) %res = extractvalue {i16, i1} %call, 0 %over = extractvalue {i16, i1} %call, 1 @@ -43,7 +43,7 @@ define linkonce_odr i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnon declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) -define linkonce_odr i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 %y) %res = extractvalue {i32, i1} %call, 0 %over = extractvalue {i32, i1} %call, 1 @@ -53,7 +53,7 @@ define linkonce_odr i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnon ret i32 %sat } -define linkonce_odr i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) %res = extractvalue {i32, i1} %call, 0 %over = extractvalue {i32, i1} %call, 1 @@ -64,7 +64,7 @@ define linkonce_odr i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnon declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) -define linkonce_odr i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %x, i64 %y) %res = extractvalue {i64, i1} %call, 0 %over = extractvalue {i64, i1} %call, 1 @@ -74,7 +74,7 @@ define linkonce_odr i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnon ret i64 %sat } -define linkonce_odr i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %x, i64 %y) %res = extractvalue {i64, i1} %call, 0 %over = extractvalue {i64, i1} %call, 1 diff --git a/generic/lib/integer/sub_sat.cl b/generic/lib/integer/sub_sat.cl new file mode 100644 index 0000000..9555b6d --- /dev/null +++ b/generic/lib/integer/sub_sat.cl @@ -0,0 +1,52 @@ +#include <clc/clc.h> + +// From sub_sat.ll +_CLC_DECL char __clc_sub_sat_s8(char, char); +_CLC_DECL uchar __clc_sub_sat_u8(uchar, uchar); +_CLC_DECL short __clc_sub_sat_s16(short, short); +_CLC_DECL ushort __clc_sub_sat_u16(ushort, ushort); +_CLC_DECL int __clc_sub_sat_s32(int, int); +_CLC_DECL uint __clc_sub_sat_u32(uint, uint); +_CLC_DECL long __clc_sub_sat_s64(long, long); +_CLC_DECL ulong __clc_sub_sat_u64(ulong, ulong); + +_CLC_OVERLOAD _CLC_DEF char sub_sat(char x, char y) { + return __clc_sub_sat_s8(x, y); +} + +_CLC_OVERLOAD _CLC_DEF uchar sub_sat(uchar x, uchar y) { + return __clc_sub_sat_u8(x, y); +} + +_CLC_OVERLOAD _CLC_DEF short sub_sat(short x, short y) { + return __clc_sub_sat_s16(x, y); +} + +_CLC_OVERLOAD _CLC_DEF ushort sub_sat(ushort x, ushort y) { + return __clc_sub_sat_u16(x, y); +} + +_CLC_OVERLOAD _CLC_DEF int sub_sat(int x, int y) { + return __clc_sub_sat_s32(x, y); +} + +_CLC_OVERLOAD _CLC_DEF uint sub_sat(uint x, uint y) { + return __clc_sub_sat_u32(x, y); +} + +_CLC_OVERLOAD _CLC_DEF long sub_sat(long x, long y) { + return __clc_sub_sat_s64(x, y); +} + +_CLC_OVERLOAD _CLC_DEF ulong sub_sat(ulong x, ulong y) { + return __clc_sub_sat_u64(x, y); +} + +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, sub_sat, char, char) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, sub_sat, uchar, uchar) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, sub_sat, short, short) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, sub_sat, ushort, ushort) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, sub_sat, int, int) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, sub_sat, uint, uint) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, sub_sat, long, long) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, sub_sat, ulong, ulong) diff --git a/generic/lib/integer/sub_sat.ll b/generic/lib/integer/sub_sat.ll new file mode 100644 index 0000000..7252574 --- /dev/null +++ b/generic/lib/integer/sub_sat.ll @@ -0,0 +1,55 @@ +declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + +define i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + ret i8 %call +} + +declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + +define i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + ret i8 %call +} + +declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + +define i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + ret i16 %call +} + +declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + +define i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + ret i16 %call +} + +declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + +define i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + ret i32 %call +} + +declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + +define i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + ret i32 %call +} + +declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + +define i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + ret i64 %call +} + +declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + +define i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + ret i64 %call +} diff --git a/generic/lib/integer/sub_sat_impl.ll b/generic/lib/integer/sub_sat_impl.ll new file mode 100644 index 0000000..e82b632 --- /dev/null +++ b/generic/lib/integer/sub_sat_impl.ll @@ -0,0 +1,83 @@ +declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8) +declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8) + +define i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) + %res = extractvalue {i8, i1} %call, 0 + %over = extractvalue {i8, i1} %call, 1 + %x.msb = ashr i8 %x, 7 + %x.limit = xor i8 %x.msb, 127 + %sat = select i1 %over, i8 %x.limit, i8 %res + ret i8 %sat +} + +define i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %x, i8 %y) + %res = extractvalue {i8, i1} %call, 0 + %over = extractvalue {i8, i1} %call, 1 + %sat = select i1 %over, i8 0, i8 %res + ret i8 %sat +} + +declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16) +declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16) + +define i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %x, i16 %y) + %res = extractvalue {i16, i1} %call, 0 + %over = extractvalue {i16, i1} %call, 1 + %x.msb = ashr i16 %x, 15 + %x.limit = xor i16 %x.msb, 32767 + %sat = select i1 %over, i16 %x.limit, i16 %res + ret i16 %sat +} + +define i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %x, i16 %y) + %res = extractvalue {i16, i1} %call, 0 + %over = extractvalue {i16, i1} %call, 1 + %sat = select i1 %over, i16 0, i16 %res + ret i16 %sat +} + +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) + +define i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %x, i32 %y) + %res = extractvalue {i32, i1} %call, 0 + %over = extractvalue {i32, i1} %call, 1 + %x.msb = ashr i32 %x, 31 + %x.limit = xor i32 %x.msb, 2147483647 + %sat = select i1 %over, i32 %x.limit, i32 %res + ret i32 %sat +} + +define i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y) + %res = extractvalue {i32, i1} %call, 0 + %over = extractvalue {i32, i1} %call, 1 + %sat = select i1 %over, i32 0, i32 %res + ret i32 %sat +} + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) + +define i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %x, i64 %y) + %res = extractvalue {i64, i1} %call, 0 + %over = extractvalue {i64, i1} %call, 1 + %x.msb = ashr i64 %x, 63 + %x.limit = xor i64 %x.msb, 9223372036854775807 + %sat = select i1 %over, i64 %x.limit, i64 %res + ret i64 %sat +} + +define i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %x, i64 %y) + %res = extractvalue {i64, i1} %call, 0 + %over = extractvalue {i64, i1} %call, 1 + %sat = select i1 %over, i64 0, i64 %res + ret i64 %sat +} diff --git a/generic/lib/relational/any.cl b/generic/lib/relational/any.cl new file mode 100644 index 0000000..4d37210 --- /dev/null +++ b/generic/lib/relational/any.cl @@ -0,0 +1,30 @@ +#include <clc/clc.h> + +#define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1) +#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1)) +#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2)) +#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3)) +#define _CLC_ANY8(v) (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) \ + | _CLC_ANY((v).s6) | _CLC_ANY((v).s7)) +#define _CLC_ANY16(v) (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) \ + | _CLC_ANY((v).sA) | _CLC_ANY((v).sB) \ + | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) \ + | _CLC_ANY((v).sE) | _CLC_ANY((v).sf)) + + +#define ANY_ID(TYPE) \ + _CLC_OVERLOAD _CLC_DEF int any(TYPE v) + +#define ANY_VECTORIZE(TYPE) \ + ANY_ID(TYPE) { return _CLC_ANY(v); } \ + ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \ + ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \ + ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \ + ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \ + ANY_ID(TYPE##16) { return _CLC_ANY16(v); } + +ANY_VECTORIZE(char) +ANY_VECTORIZE(short) +ANY_VECTORIZE(int) +ANY_VECTORIZE(long) + diff --git a/generic/lib/workitem/get_global_id.cl b/generic/lib/workitem/get_global_id.cl new file mode 100644 index 0000000..fdd83d2 --- /dev/null +++ b/generic/lib/workitem/get_global_id.cl @@ -0,0 +1,5 @@ +#include <clc/clc.h> + +_CLC_DEF size_t get_global_id(uint dim) { + return get_group_id(dim)*get_local_size(dim) + get_local_id(dim); +} diff --git a/generic/lib/workitem/get_global_size.cl b/generic/lib/workitem/get_global_size.cl new file mode 100644 index 0000000..5ae649e --- /dev/null +++ b/generic/lib/workitem/get_global_size.cl @@ -0,0 +1,5 @@ +#include <clc/clc.h> + +_CLC_DEF size_t get_global_size(uint dim) { + return get_num_groups(dim)*get_local_size(dim); +} diff --git a/ptx-nvidiacl/include/clc/workitem/get_global_id.h b/ptx-nvidiacl/include/clc/workitem/get_global_id.h deleted file mode 100644 index 026d2fe..0000000 --- a/ptx-nvidiacl/include/clc/workitem/get_global_id.h +++ /dev/null @@ -1,8 +0,0 @@ -_CLC_INLINE size_t get_global_id(uint dim) { - switch (dim) { - case 0: return __builtin_ptx_read_ctaid_x()*__builtin_ptx_read_ntid_x()+__builtin_ptx_read_tid_x(); - case 1: return __builtin_ptx_read_ctaid_y()*__builtin_ptx_read_ntid_y()+__builtin_ptx_read_tid_y(); - case 2: return __builtin_ptx_read_ctaid_z()*__builtin_ptx_read_ntid_z()+__builtin_ptx_read_tid_z(); - default: return 0; - } -} diff --git a/ptx-nvidiacl/include/clc/workitem/get_global_size.h b/ptx-nvidiacl/include/clc/workitem/get_global_size.h deleted file mode 100644 index 5cd4222..0000000 --- a/ptx-nvidiacl/include/clc/workitem/get_global_size.h +++ /dev/null @@ -1,8 +0,0 @@ -_CLC_INLINE size_t get_global_size(uint dim) { - switch (dim) { - case 0: return __builtin_ptx_read_nctaid_x()*__builtin_ptx_read_ntid_x(); - case 1: return __builtin_ptx_read_nctaid_y()*__builtin_ptx_read_ntid_y(); - case 2: return __builtin_ptx_read_nctaid_z()*__builtin_ptx_read_ntid_z(); - default: return 0; - } -} diff --git a/ptx-nvidiacl/lib/SOURCES b/ptx-nvidiacl/lib/SOURCES index e69de29..7cdbd85 100644 --- a/ptx-nvidiacl/lib/SOURCES +++ b/ptx-nvidiacl/lib/SOURCES @@ -0,0 +1,5 @@ +synchronization/barrier.cl +workitem/get_group_id.cl +workitem/get_local_id.cl +workitem/get_local_size.cl +workitem/get_num_groups.cl diff --git a/ptx-nvidiacl/include/clc/synchronization/barrier.h b/ptx-nvidiacl/lib/synchronization/barrier.cl index cd9f327..fb36c26 100644 --- a/ptx-nvidiacl/include/clc/synchronization/barrier.h +++ b/ptx-nvidiacl/lib/synchronization/barrier.cl @@ -1,4 +1,6 @@ -_CLC_INLINE void barrier(cl_mem_fence_flags flags) { +#include <clc/clc.h> + +_CLC_DEF void barrier(cl_mem_fence_flags flags) { if (flags & CLK_LOCAL_MEM_FENCE) { __builtin_ptx_bar_sync(0); } diff --git a/ptx-nvidiacl/include/clc/workitem/get_group_id.h b/ptx-nvidiacl/lib/workitem/get_group_id.cl index 18b1bd4..2b35b4e 100644 --- a/ptx-nvidiacl/include/clc/workitem/get_group_id.h +++ b/ptx-nvidiacl/lib/workitem/get_group_id.cl @@ -1,4 +1,6 @@ -_CLC_INLINE size_t get_group_id(uint dim) { +#include <clc/clc.h> + +_CLC_DEF size_t get_group_id(uint dim) { switch (dim) { case 0: return __builtin_ptx_read_ctaid_x(); case 1: return __builtin_ptx_read_ctaid_y(); diff --git a/ptx-nvidiacl/include/clc/workitem/get_local_id.h b/ptx-nvidiacl/lib/workitem/get_local_id.cl index 1b8c776..f0cfdc0 100644 --- a/ptx-nvidiacl/include/clc/workitem/get_local_id.h +++ b/ptx-nvidiacl/lib/workitem/get_local_id.cl @@ -1,4 +1,6 @@ -_CLC_INLINE size_t get_local_id(uint dim) { +#include <clc/clc.h> + +_CLC_DEF size_t get_local_id(uint dim) { switch (dim) { case 0: return __builtin_ptx_read_tid_x(); case 1: return __builtin_ptx_read_tid_y(); diff --git a/ptx-nvidiacl/include/clc/workitem/get_local_size.h b/ptx-nvidiacl/lib/workitem/get_local_size.cl index cbc1f6e..c3f5425 100644 --- a/ptx-nvidiacl/include/clc/workitem/get_local_size.h +++ b/ptx-nvidiacl/lib/workitem/get_local_size.cl @@ -1,4 +1,6 @@ -_CLC_INLINE size_t get_local_size(uint dim) { +#include <clc/clc.h> + +_CLC_DEF size_t get_local_size(uint dim) { switch (dim) { case 0: return __builtin_ptx_read_ntid_x(); case 1: return __builtin_ptx_read_ntid_y(); diff --git a/ptx-nvidiacl/include/clc/workitem/get_num_groups.h b/ptx-nvidiacl/lib/workitem/get_num_groups.cl index 36ee849..90bdc2e 100644 --- a/ptx-nvidiacl/include/clc/workitem/get_num_groups.h +++ b/ptx-nvidiacl/lib/workitem/get_num_groups.cl @@ -1,4 +1,6 @@ -_CLC_INLINE size_t get_num_groups(uint dim) { +#include <clc/clc.h> + +_CLC_DEF size_t get_num_groups(uint dim) { switch (dim) { case 0: return __builtin_ptx_read_nctaid_x(); case 1: return __builtin_ptx_read_nctaid_y(); diff --git a/ptx/lib/SOURCES b/ptx/lib/SOURCES index aab8e3f..fb6e17f 100644 --- a/ptx/lib/SOURCES +++ b/ptx/lib/SOURCES @@ -1 +1,2 @@ integer/add_sat.ll +integer/sub_sat.ll
\ No newline at end of file diff --git a/ptx/lib/integer/add_sat.ll b/ptx/lib/integer/add_sat.ll index 9b8311c..f887962 100644 --- a/ptx/lib/integer/add_sat.ll +++ b/ptx/lib/integer/add_sat.ll @@ -1,55 +1,55 @@ declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) -define linkonce_odr ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) ret i8 %call } declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) -define linkonce_odr ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) ret i8 %call } declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) -define linkonce_odr ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) ret i16 %call } declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) -define linkonce_odr ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) ret i16 %call } declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) -define linkonce_odr ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) ret i32 %call } declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) -define linkonce_odr ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) ret i32 %call } declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) -define linkonce_odr ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) ret i64 %call } declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) -define linkonce_odr ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) ret i64 %call } diff --git a/ptx/lib/integer/sub_sat.ll b/ptx/lib/integer/sub_sat.ll new file mode 100644 index 0000000..1a66eb5 --- /dev/null +++ b/ptx/lib/integer/sub_sat.ll @@ -0,0 +1,55 @@ +declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + +define ptx_device i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + ret i8 %call +} + +declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + +define ptx_device i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + ret i8 %call +} + +declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + +define ptx_device i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + ret i16 %call +} + +declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + +define ptx_device i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + ret i16 %call +} + +declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + +define ptx_device i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + ret i32 %call +} + +declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + +define ptx_device i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + ret i32 %call +} + +declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + +define ptx_device i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + ret i64 %call +} + +declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + +define ptx_device i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + ret i64 %call +} diff --git a/test/rsqrt.cl b/test/rsqrt.cl new file mode 100644 index 0000000..13ad216 --- /dev/null +++ b/test/rsqrt.cl @@ -0,0 +1,6 @@ +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +__kernel void foo(float4 *x, double4 *y) { + x[1] = rsqrt(x[0]); + y[1] = rsqrt(y[0]); +} diff --git a/test/subsat.cl b/test/subsat.cl new file mode 100644 index 0000000..a83414b --- /dev/null +++ b/test/subsat.cl @@ -0,0 +1,19 @@ +__kernel void test_subsat_char(char *a, char x, char y) { + *a = sub_sat(x, y); + return; +} + +__kernel void test_subsat_uchar(uchar *a, uchar x, uchar y) { + *a = sub_sat(x, y); + return; +} + +__kernel void test_subsat_long(long *a, long x, long y) { + *a = sub_sat(x, y); + return; +} + +__kernel void test_subsat_ulong(ulong *a, ulong x, ulong y) { + *a = sub_sat(x, y); + return; +}
\ No newline at end of file |