summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2012-10-09 18:59:35 +0000
committerTom Stellard <thomas.stellard@amd.com>2012-10-09 18:59:35 +0000
commitc6fad48798dac1544a90e415dacffda4bd4eca8f (patch)
tree52cf00de32cf7710ad8e1744b42a724d8ca4e662
parentf46b2c8112ca81dd15cc372104006c655075f3a0 (diff)
parent14e5a4f2116dd512d3bd6489d55760aa487cc3dd (diff)
Merge branch 'master' of http://llvm.org/git/libclc
Conflicts: generic/include/clc/synchronization/barrier.h generic/include/clc/workitem/get_global_id.h generic/include/clc/workitem/get_global_size.h generic/include/clc/workitem/get_group_id.h generic/include/clc/workitem/get_local_id.h generic/include/clc/workitem/get_local_size.h generic/include/clc/workitem/get_num_groups.h
-rw-r--r--build/metabuild.py6
-rwxr-xr-xconfigure.py37
-rw-r--r--generic/include/clc/clc.h5
-rw-r--r--generic/include/clc/integer/sub_sat.h2
-rw-r--r--generic/include/clc/integer/sub_sat.inc1
-rw-r--r--generic/include/clc/math/floor.h6
-rw-r--r--generic/include/clc/math/native_powr.h1
-rw-r--r--generic/include/clc/math/rsqrt.h1
-rw-r--r--generic/include/clc/relational/any.h16
-rw-r--r--generic/include/clc/synchronization/barrier.h2
-rw-r--r--generic/include/clc/workitem/get_global_id.h2
-rw-r--r--generic/include/clc/workitem/get_global_size.h2
-rw-r--r--generic/include/clc/workitem/get_group_id.h2
-rw-r--r--generic/include/clc/workitem/get_local_id.h2
-rw-r--r--generic/include/clc/workitem/get_local_size.h2
-rw-r--r--generic/include/clc/workitem/get_num_groups.h2
-rw-r--r--generic/lib/SOURCES6
-rw-r--r--generic/lib/integer/add_sat.cl16
-rw-r--r--generic/lib/integer/add_sat.ll16
-rw-r--r--generic/lib/integer/add_sat_impl.ll16
-rw-r--r--generic/lib/integer/sub_sat.cl52
-rw-r--r--generic/lib/integer/sub_sat.ll55
-rw-r--r--generic/lib/integer/sub_sat_impl.ll83
-rw-r--r--generic/lib/relational/any.cl30
-rw-r--r--generic/lib/workitem/get_global_id.cl5
-rw-r--r--generic/lib/workitem/get_global_size.cl5
-rw-r--r--ptx-nvidiacl/include/clc/workitem/get_global_id.h8
-rw-r--r--ptx-nvidiacl/include/clc/workitem/get_global_size.h8
-rw-r--r--ptx-nvidiacl/lib/SOURCES5
-rw-r--r--ptx-nvidiacl/lib/synchronization/barrier.cl (renamed from ptx-nvidiacl/include/clc/synchronization/barrier.h)4
-rw-r--r--ptx-nvidiacl/lib/workitem/get_group_id.cl (renamed from ptx-nvidiacl/include/clc/workitem/get_group_id.h)4
-rw-r--r--ptx-nvidiacl/lib/workitem/get_local_id.cl (renamed from ptx-nvidiacl/include/clc/workitem/get_local_id.h)4
-rw-r--r--ptx-nvidiacl/lib/workitem/get_local_size.cl (renamed from ptx-nvidiacl/include/clc/workitem/get_local_size.h)4
-rw-r--r--ptx-nvidiacl/lib/workitem/get_num_groups.cl (renamed from ptx-nvidiacl/include/clc/workitem/get_num_groups.h)4
-rw-r--r--ptx/lib/SOURCES1
-rw-r--r--ptx/lib/integer/add_sat.ll16
-rw-r--r--ptx/lib/integer/sub_sat.ll55
-rw-r--r--test/rsqrt.cl6
-rw-r--r--test/subsat.cl19
39 files changed, 439 insertions, 72 deletions
diff --git a/build/metabuild.py b/build/metabuild.py
index 65870ac..c675e09 100644
--- a/build/metabuild.py
+++ b/build/metabuild.py
@@ -9,6 +9,7 @@ class Make(object):
self.rules = {}
self.rule_text = ''
self.all_targets = []
+ self.default_targets = []
self.clean_files = []
self.distclean_files = []
self.output.write("""all::
@@ -68,8 +69,11 @@ endif
return input
return [input]
+ def default(self, paths):
+ self.default_targets += self._as_list(paths)
+
def finish(self):
- self.output.write('all:: %s\n\n' % ' '.join(self.all_targets))
+ self.output.write('all:: %s\n\n' % ' '.join(self.default_targets or self.all_targets))
self.output.write('clean: \n\trm -f %s\n\n' % ' '.join(self.clean_files))
self.output.write('distclean: clean\n\trm -f %s\n' % ' '.join(self.distclean_files))
diff --git a/configure.py b/configure.py
index 0236345..83a9375 100755
--- a/configure.py
+++ b/configure.py
@@ -17,6 +17,8 @@ import metabuild
p = OptionParser()
p.add_option('--with-llvm-config', metavar='PATH',
help='use given llvm-config script')
+p.add_option('--prefix', metavar='PATH',
+ help='install to given prefix')
p.add_option('-g', metavar='GENERATOR', default='make',
help='use given generator (default: make)')
(options, args) = p.parse_args()
@@ -70,6 +72,9 @@ b.rule("PREPARE_BUILTINS", "%s -o $out $in" % prepare_builtins,
manifest_deps = set([sys.argv[0], os.path.join(srcdir, 'build', 'metabuild.py'),
os.path.join(srcdir, 'build', 'ninja_syntax.py')])
+install_files = []
+install_deps = []
+
for target in targets:
(t_arch, t_vendor, t_os) = target.split('-')
archs = [t_arch]
@@ -83,12 +88,13 @@ for target in targets:
subdirs.append("%s-%s" % (arch, t_os))
subdirs.append(arch)
- subdirs = [subdir for subdir in subdirs
- if os.path.isdir(os.path.join(srcdir, subdir, 'include')) or
- os.path.isfile(os.path.join(srcdir, subdir, 'lib', 'SOURCES'))]
+ incdirs = filter(os.path.isdir,
+ [os.path.join(srcdir, subdir, 'include') for subdir in subdirs])
+ libdirs = filter(lambda d: os.path.isfile(os.path.join(d, 'SOURCES')),
+ [os.path.join(srcdir, subdir, 'lib') for subdir in subdirs])
- clang_cl_includes = ' '.join(["-I%s" % os.path.join(srcdir, subdir, 'include')
- for subdir in subdirs])
+ clang_cl_includes = ' '.join(["-I%s" % incdir for incdir in incdirs])
+ install_files += [(incdir, incdir[len(srcdir)+1:]) for incdir in incdirs]
# The rule for building a .bc file for the specified architecture using clang.
clang_bc_flags = "-ccc-host-triple %s -I`dirname $in` %s " \
@@ -101,11 +107,8 @@ for target in targets:
objects = []
sources_seen = set()
- for subdir in subdirs:
- src_libdir = os.path.join(srcdir, subdir, 'lib')
- if not os.path.isdir(src_libdir):
- continue
- subdir_list_file = os.path.join(src_libdir, 'SOURCES')
+ for libdir in libdirs:
+ subdir_list_file = os.path.join(libdir, 'SOURCES')
manifest_deps.add(subdir_list_file)
for src in open(subdir_list_file).readlines():
src = src.rstrip()
@@ -113,7 +116,7 @@ for target in targets:
sources_seen.add(src)
obj = os.path.join(target, 'lib', src + '.bc')
objects.append(obj)
- src_file = os.path.join(src_libdir, src)
+ src_file = os.path.join(libdir, src)
ext = os.path.splitext(src)[1]
if ext == '.ll':
b.build(obj, 'LLVM_AS', src_file)
@@ -126,6 +129,18 @@ for target in targets:
b.build(builtins_link_bc, "LLVM_LINK", objects)
b.build(builtins_opt_bc, "OPT", builtins_link_bc)
b.build(builtins_bc, "PREPARE_BUILTINS", builtins_opt_bc, prepare_builtins)
+ install_files.append((builtins_bc, builtins_bc))
+ install_deps.append(builtins_bc)
+ b.default(builtins_bc)
+
+if options.prefix:
+ install_cmd = ' && '.join(['mkdir -p %(dst)s && cp -r %(src)s %(dst)s' %
+ {'src': file,
+ 'dst': os.path.join(options.prefix,
+ os.path.dirname(dest))}
+ for (file, dest) in install_files])
+ b.rule('install', command = install_cmd, description = 'INSTALL')
+ b.build('install', 'install', install_deps)
b.rule("configure", command = ' '.join(sys.argv), description = 'CONFIGURE',
generator = True)
diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index b0cbd4a..315693b 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -36,6 +36,7 @@
#include <clc/math/exp.h>
#include <clc/math/exp2.h>
#include <clc/math/fabs.h>
+#include <clc/math/floor.h>
#include <clc/math/fma.h>
#include <clc/math/hypot.h>
#include <clc/math/log.h>
@@ -50,13 +51,16 @@
#include <clc/math/native_exp2.h>
#include <clc/math/native_log.h>
#include <clc/math/native_log2.h>
+#include <clc/math/native_powr.h>
#include <clc/math/native_sin.h>
#include <clc/math/native_sqrt.h>
+#include <clc/math/rsqrt.h>
/* 6.11.3 Integer Functions */
#include <clc/integer/abs.h>
#include <clc/integer/abs_diff.h>
#include <clc/integer/add_sat.h>
+#include <clc/integer/sub_sat.h>
/* 6.11.5 Geometric Functions */
#include <clc/geometric/cross.h>
@@ -65,6 +69,7 @@
#include <clc/geometric/normalize.h>
/* 6.11.6 Relational Functions */
+#include <clc/relational/any.h>
#include <clc/relational/select.h>
/* 6.11.8 Synchronization Functions */
diff --git a/generic/include/clc/integer/sub_sat.h b/generic/include/clc/integer/sub_sat.h
new file mode 100644
index 0000000..942274d
--- /dev/null
+++ b/generic/include/clc/integer/sub_sat.h
@@ -0,0 +1,2 @@
+#define BODY <clc/integer/sub_sat.inc>
+#include <clc/integer/gentype.inc>
diff --git a/generic/include/clc/integer/sub_sat.inc b/generic/include/clc/integer/sub_sat.inc
new file mode 100644
index 0000000..3e0f8f9
--- /dev/null
+++ b/generic/include/clc/integer/sub_sat.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL GENTYPE sub_sat(GENTYPE x, GENTYPE y);
diff --git a/generic/include/clc/math/floor.h b/generic/include/clc/math/floor.h
new file mode 100644
index 0000000..abb7c2a
--- /dev/null
+++ b/generic/include/clc/math/floor.h
@@ -0,0 +1,6 @@
+#undef floor
+#define floor __clc_floor
+
+#define FUNCTION __clc_floor
+#define INTRINSIC "llvm.floor"
+#include <clc/math/unary_intrin.inc>
diff --git a/generic/include/clc/math/native_powr.h b/generic/include/clc/math/native_powr.h
new file mode 100644
index 0000000..e8a37d9
--- /dev/null
+++ b/generic/include/clc/math/native_powr.h
@@ -0,0 +1 @@
+#define native_powr pow
diff --git a/generic/include/clc/math/rsqrt.h b/generic/include/clc/math/rsqrt.h
new file mode 100644
index 0000000..8fd2cbf
--- /dev/null
+++ b/generic/include/clc/math/rsqrt.h
@@ -0,0 +1 @@
+#define rsqrt(x) (1.f/sqrt(x))
diff --git a/generic/include/clc/relational/any.h b/generic/include/clc/relational/any.h
new file mode 100644
index 0000000..4687ed2
--- /dev/null
+++ b/generic/include/clc/relational/any.h
@@ -0,0 +1,16 @@
+
+#define _CLC_ANY_DECL(TYPE) \
+ _CLC_OVERLOAD _CLC_DECL int any(TYPE v);
+
+#define _CLC_VECTOR_ANY_DECL(TYPE) \
+ _CLC_ANY_DECL(TYPE) \
+ _CLC_ANY_DECL(TYPE##2) \
+ _CLC_ANY_DECL(TYPE##3) \
+ _CLC_ANY_DECL(TYPE##4) \
+ _CLC_ANY_DECL(TYPE##8) \
+ _CLC_ANY_DECL(TYPE##16)
+
+_CLC_VECTOR_ANY_DECL(char)
+_CLC_VECTOR_ANY_DECL(short)
+_CLC_VECTOR_ANY_DECL(int)
+_CLC_VECTOR_ANY_DECL(long)
diff --git a/generic/include/clc/synchronization/barrier.h b/generic/include/clc/synchronization/barrier.h
index 9ef2360..7167a3d 100644
--- a/generic/include/clc/synchronization/barrier.h
+++ b/generic/include/clc/synchronization/barrier.h
@@ -1 +1 @@
-void barrier(cl_mem_fence_flags flags);
+_CLC_DECL void barrier(cl_mem_fence_flags flags);
diff --git a/generic/include/clc/workitem/get_global_id.h b/generic/include/clc/workitem/get_global_id.h
index b61450f..92759f1 100644
--- a/generic/include/clc/workitem/get_global_id.h
+++ b/generic/include/clc/workitem/get_global_id.h
@@ -1 +1 @@
-size_t get_global_id(uint dim);
+_CLC_DECL size_t get_global_id(uint dim);
diff --git a/generic/include/clc/workitem/get_global_size.h b/generic/include/clc/workitem/get_global_size.h
index 8d7b9a1..2f83705 100644
--- a/generic/include/clc/workitem/get_global_size.h
+++ b/generic/include/clc/workitem/get_global_size.h
@@ -1 +1 @@
-size_t get_global_size(uint dim);
+_CLC_DECL size_t get_global_size(uint dim);
diff --git a/generic/include/clc/workitem/get_group_id.h b/generic/include/clc/workitem/get_group_id.h
index 3d904b9..346c82c 100644
--- a/generic/include/clc/workitem/get_group_id.h
+++ b/generic/include/clc/workitem/get_group_id.h
@@ -1 +1 @@
-size_t get_group_id(uint dim);
+_CLC_DECL size_t get_group_id(uint dim);
diff --git a/generic/include/clc/workitem/get_local_id.h b/generic/include/clc/workitem/get_local_id.h
index f584fab..169aeed 100644
--- a/generic/include/clc/workitem/get_local_id.h
+++ b/generic/include/clc/workitem/get_local_id.h
@@ -1 +1 @@
-size_t get_local_id(uint dim);
+_CLC_DECL size_t get_local_id(uint dim);
diff --git a/generic/include/clc/workitem/get_local_size.h b/generic/include/clc/workitem/get_local_size.h
index 22a96aa..040ec58 100644
--- a/generic/include/clc/workitem/get_local_size.h
+++ b/generic/include/clc/workitem/get_local_size.h
@@ -1 +1 @@
-size_t get_local_size(uint dim);
+_CLC_DECL size_t get_local_size(uint dim);
diff --git a/generic/include/clc/workitem/get_num_groups.h b/generic/include/clc/workitem/get_num_groups.h
index b9baad6..e555c7e 100644
--- a/generic/include/clc/workitem/get_num_groups.h
+++ b/generic/include/clc/workitem/get_num_groups.h
@@ -1 +1 @@
-size_t get_num_groups(uint dim);
+_CLC_DECL size_t get_num_groups(uint dim);
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index 0608116..d29ca1f 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -7,5 +7,11 @@ integer/abs.cl
integer/add_sat.cl
integer/add_sat.ll
integer/add_sat_impl.ll
+integer/sub_sat.cl
+integer/sub_sat.ll
+integer/sub_sat_impl.ll
math/hypot.cl
math/mad.cl
+relational/any.cl
+workitem/get_global_id.cl
+workitem/get_global_size.cl
diff --git a/generic/lib/integer/add_sat.cl b/generic/lib/integer/add_sat.cl
index aae2e7f..7eb4e39 100644
--- a/generic/lib/integer/add_sat.cl
+++ b/generic/lib/integer/add_sat.cl
@@ -1,14 +1,14 @@
#include <clc/clc.h>
// From add_sat.ll
-_CLC_DECL char __clc_add_sat_s8(char, char);
-_CLC_DECL char __clc_add_sat_u8(uchar, uchar);
-_CLC_DECL char __clc_add_sat_s16(short, short);
-_CLC_DECL char __clc_add_sat_u16(ushort, ushort);
-_CLC_DECL char __clc_add_sat_s32(int, int);
-_CLC_DECL char __clc_add_sat_u32(uint, uint);
-_CLC_DECL char __clc_add_sat_s64(long, long);
-_CLC_DECL char __clc_add_sat_u64(ulong, ulong);
+_CLC_DECL char __clc_add_sat_s8(char, char);
+_CLC_DECL uchar __clc_add_sat_u8(uchar, uchar);
+_CLC_DECL short __clc_add_sat_s16(short, short);
+_CLC_DECL ushort __clc_add_sat_u16(ushort, ushort);
+_CLC_DECL int __clc_add_sat_s32(int, int);
+_CLC_DECL uint __clc_add_sat_u32(uint, uint);
+_CLC_DECL long __clc_add_sat_s64(long, long);
+_CLC_DECL ulong __clc_add_sat_u64(ulong, ulong);
_CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) {
return __clc_add_sat_s8(x, y);
diff --git a/generic/lib/integer/add_sat.ll b/generic/lib/integer/add_sat.ll
index d6814c3..bcbe4c0 100644
--- a/generic/lib/integer/add_sat.ll
+++ b/generic/lib/integer/add_sat.ll
@@ -1,55 +1,55 @@
declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
-define linkonce_odr i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+define i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
ret i8 %call
}
declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
-define linkonce_odr i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+define i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
ret i8 %call
}
declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
-define linkonce_odr i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+define i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
ret i16 %call
}
declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
-define linkonce_odr i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+define i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
ret i16 %call
}
declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
-define linkonce_odr i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+define i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
ret i32 %call
}
declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
-define linkonce_odr i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+define i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
ret i32 %call
}
declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
-define linkonce_odr i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+define i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
ret i64 %call
}
declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
-define linkonce_odr i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+define i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
ret i64 %call
}
diff --git a/generic/lib/integer/add_sat_impl.ll b/generic/lib/integer/add_sat_impl.ll
index 92f4c53..c150ecb 100644
--- a/generic/lib/integer/add_sat_impl.ll
+++ b/generic/lib/integer/add_sat_impl.ll
@@ -1,7 +1,7 @@
declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
-define linkonce_odr i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+define i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
%res = extractvalue {i8, i1} %call, 0
%over = extractvalue {i8, i1} %call, 1
@@ -11,7 +11,7 @@ define linkonce_odr i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone al
ret i8 %sat
}
-define linkonce_odr i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+define i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
%res = extractvalue {i8, i1} %call, 0
%over = extractvalue {i8, i1} %call, 1
@@ -22,7 +22,7 @@ define linkonce_odr i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone al
declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16)
declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16)
-define linkonce_odr i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+define i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %x, i16 %y)
%res = extractvalue {i16, i1} %call, 0
%over = extractvalue {i16, i1} %call, 1
@@ -32,7 +32,7 @@ define linkonce_odr i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnon
ret i16 %sat
}
-define linkonce_odr i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+define i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y)
%res = extractvalue {i16, i1} %call, 0
%over = extractvalue {i16, i1} %call, 1
@@ -43,7 +43,7 @@ define linkonce_odr i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnon
declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
-define linkonce_odr i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+define i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
%res = extractvalue {i32, i1} %call, 0
%over = extractvalue {i32, i1} %call, 1
@@ -53,7 +53,7 @@ define linkonce_odr i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnon
ret i32 %sat
}
-define linkonce_odr i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+define i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
%res = extractvalue {i32, i1} %call, 0
%over = extractvalue {i32, i1} %call, 1
@@ -64,7 +64,7 @@ define linkonce_odr i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnon
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64)
declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64)
-define linkonce_odr i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+define i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %x, i64 %y)
%res = extractvalue {i64, i1} %call, 0
%over = extractvalue {i64, i1} %call, 1
@@ -74,7 +74,7 @@ define linkonce_odr i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnon
ret i64 %sat
}
-define linkonce_odr i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+define i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %x, i64 %y)
%res = extractvalue {i64, i1} %call, 0
%over = extractvalue {i64, i1} %call, 1
diff --git a/generic/lib/integer/sub_sat.cl b/generic/lib/integer/sub_sat.cl
new file mode 100644
index 0000000..9555b6d
--- /dev/null
+++ b/generic/lib/integer/sub_sat.cl
@@ -0,0 +1,52 @@
+#include <clc/clc.h>
+
+// From sub_sat.ll
+_CLC_DECL char __clc_sub_sat_s8(char, char);
+_CLC_DECL uchar __clc_sub_sat_u8(uchar, uchar);
+_CLC_DECL short __clc_sub_sat_s16(short, short);
+_CLC_DECL ushort __clc_sub_sat_u16(ushort, ushort);
+_CLC_DECL int __clc_sub_sat_s32(int, int);
+_CLC_DECL uint __clc_sub_sat_u32(uint, uint);
+_CLC_DECL long __clc_sub_sat_s64(long, long);
+_CLC_DECL ulong __clc_sub_sat_u64(ulong, ulong);
+
+_CLC_OVERLOAD _CLC_DEF char sub_sat(char x, char y) {
+ return __clc_sub_sat_s8(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF uchar sub_sat(uchar x, uchar y) {
+ return __clc_sub_sat_u8(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF short sub_sat(short x, short y) {
+ return __clc_sub_sat_s16(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF ushort sub_sat(ushort x, ushort y) {
+ return __clc_sub_sat_u16(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF int sub_sat(int x, int y) {
+ return __clc_sub_sat_s32(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF uint sub_sat(uint x, uint y) {
+ return __clc_sub_sat_u32(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF long sub_sat(long x, long y) {
+ return __clc_sub_sat_s64(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF ulong sub_sat(ulong x, ulong y) {
+ return __clc_sub_sat_u64(x, y);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, sub_sat, char, char)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, sub_sat, uchar, uchar)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, sub_sat, short, short)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, sub_sat, ushort, ushort)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, sub_sat, int, int)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, sub_sat, uint, uint)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, sub_sat, long, long)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, sub_sat, ulong, ulong)
diff --git a/generic/lib/integer/sub_sat.ll b/generic/lib/integer/sub_sat.ll
new file mode 100644
index 0000000..7252574
--- /dev/null
+++ b/generic/lib/integer/sub_sat.ll
@@ -0,0 +1,55 @@
+declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
+
+define i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
+
+define i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
+
+define i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
+
+define i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
+
+define i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
+
+define i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
+
+define i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
+ ret i64 %call
+}
+
+declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
+
+define i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
+ ret i64 %call
+}
diff --git a/generic/lib/integer/sub_sat_impl.ll b/generic/lib/integer/sub_sat_impl.ll
new file mode 100644
index 0000000..e82b632
--- /dev/null
+++ b/generic/lib/integer/sub_sat_impl.ll
@@ -0,0 +1,83 @@
+declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
+
+define i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
+ %res = extractvalue {i8, i1} %call, 0
+ %over = extractvalue {i8, i1} %call, 1
+ %x.msb = ashr i8 %x, 7
+ %x.limit = xor i8 %x.msb, 127
+ %sat = select i1 %over, i8 %x.limit, i8 %res
+ ret i8 %sat
+}
+
+define i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %x, i8 %y)
+ %res = extractvalue {i8, i1} %call, 0
+ %over = extractvalue {i8, i1} %call, 1
+ %sat = select i1 %over, i8 0, i8 %res
+ ret i8 %sat
+}
+
+declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16)
+declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16)
+
+define i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %x, i16 %y)
+ %res = extractvalue {i16, i1} %call, 0
+ %over = extractvalue {i16, i1} %call, 1
+ %x.msb = ashr i16 %x, 15
+ %x.limit = xor i16 %x.msb, 32767
+ %sat = select i1 %over, i16 %x.limit, i16 %res
+ ret i16 %sat
+}
+
+define i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %x, i16 %y)
+ %res = extractvalue {i16, i1} %call, 0
+ %over = extractvalue {i16, i1} %call, 1
+ %sat = select i1 %over, i16 0, i16 %res
+ ret i16 %sat
+}
+
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)
+
+define i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)
+ %res = extractvalue {i32, i1} %call, 0
+ %over = extractvalue {i32, i1} %call, 1
+ %x.msb = ashr i32 %x, 31
+ %x.limit = xor i32 %x.msb, 2147483647
+ %sat = select i1 %over, i32 %x.limit, i32 %res
+ ret i32 %sat
+}
+
+define i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
+ %res = extractvalue {i32, i1} %call, 0
+ %over = extractvalue {i32, i1} %call, 1
+ %sat = select i1 %over, i32 0, i32 %res
+ ret i32 %sat
+}
+
+declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64)
+declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64)
+
+define i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %x, i64 %y)
+ %res = extractvalue {i64, i1} %call, 0
+ %over = extractvalue {i64, i1} %call, 1
+ %x.msb = ashr i64 %x, 63
+ %x.limit = xor i64 %x.msb, 9223372036854775807
+ %sat = select i1 %over, i64 %x.limit, i64 %res
+ ret i64 %sat
+}
+
+define i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %x, i64 %y)
+ %res = extractvalue {i64, i1} %call, 0
+ %over = extractvalue {i64, i1} %call, 1
+ %sat = select i1 %over, i64 0, i64 %res
+ ret i64 %sat
+}
diff --git a/generic/lib/relational/any.cl b/generic/lib/relational/any.cl
new file mode 100644
index 0000000..4d37210
--- /dev/null
+++ b/generic/lib/relational/any.cl
@@ -0,0 +1,30 @@
+#include <clc/clc.h>
+
+#define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
+#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1))
+#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2))
+#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3))
+#define _CLC_ANY8(v) (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) \
+ | _CLC_ANY((v).s6) | _CLC_ANY((v).s7))
+#define _CLC_ANY16(v) (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) \
+ | _CLC_ANY((v).sA) | _CLC_ANY((v).sB) \
+ | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) \
+ | _CLC_ANY((v).sE) | _CLC_ANY((v).sf))
+
+
+#define ANY_ID(TYPE) \
+ _CLC_OVERLOAD _CLC_DEF int any(TYPE v)
+
+#define ANY_VECTORIZE(TYPE) \
+ ANY_ID(TYPE) { return _CLC_ANY(v); } \
+ ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \
+ ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \
+ ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \
+ ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \
+ ANY_ID(TYPE##16) { return _CLC_ANY16(v); }
+
+ANY_VECTORIZE(char)
+ANY_VECTORIZE(short)
+ANY_VECTORIZE(int)
+ANY_VECTORIZE(long)
+
diff --git a/generic/lib/workitem/get_global_id.cl b/generic/lib/workitem/get_global_id.cl
new file mode 100644
index 0000000..fdd83d2
--- /dev/null
+++ b/generic/lib/workitem/get_global_id.cl
@@ -0,0 +1,5 @@
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_global_id(uint dim) {
+ return get_group_id(dim)*get_local_size(dim) + get_local_id(dim);
+}
diff --git a/generic/lib/workitem/get_global_size.cl b/generic/lib/workitem/get_global_size.cl
new file mode 100644
index 0000000..5ae649e
--- /dev/null
+++ b/generic/lib/workitem/get_global_size.cl
@@ -0,0 +1,5 @@
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_global_size(uint dim) {
+ return get_num_groups(dim)*get_local_size(dim);
+}
diff --git a/ptx-nvidiacl/include/clc/workitem/get_global_id.h b/ptx-nvidiacl/include/clc/workitem/get_global_id.h
deleted file mode 100644
index 026d2fe..0000000
--- a/ptx-nvidiacl/include/clc/workitem/get_global_id.h
+++ /dev/null
@@ -1,8 +0,0 @@
-_CLC_INLINE size_t get_global_id(uint dim) {
- switch (dim) {
- case 0: return __builtin_ptx_read_ctaid_x()*__builtin_ptx_read_ntid_x()+__builtin_ptx_read_tid_x();
- case 1: return __builtin_ptx_read_ctaid_y()*__builtin_ptx_read_ntid_y()+__builtin_ptx_read_tid_y();
- case 2: return __builtin_ptx_read_ctaid_z()*__builtin_ptx_read_ntid_z()+__builtin_ptx_read_tid_z();
- default: return 0;
- }
-}
diff --git a/ptx-nvidiacl/include/clc/workitem/get_global_size.h b/ptx-nvidiacl/include/clc/workitem/get_global_size.h
deleted file mode 100644
index 5cd4222..0000000
--- a/ptx-nvidiacl/include/clc/workitem/get_global_size.h
+++ /dev/null
@@ -1,8 +0,0 @@
-_CLC_INLINE size_t get_global_size(uint dim) {
- switch (dim) {
- case 0: return __builtin_ptx_read_nctaid_x()*__builtin_ptx_read_ntid_x();
- case 1: return __builtin_ptx_read_nctaid_y()*__builtin_ptx_read_ntid_y();
- case 2: return __builtin_ptx_read_nctaid_z()*__builtin_ptx_read_ntid_z();
- default: return 0;
- }
-}
diff --git a/ptx-nvidiacl/lib/SOURCES b/ptx-nvidiacl/lib/SOURCES
index e69de29..7cdbd85 100644
--- a/ptx-nvidiacl/lib/SOURCES
+++ b/ptx-nvidiacl/lib/SOURCES
@@ -0,0 +1,5 @@
+synchronization/barrier.cl
+workitem/get_group_id.cl
+workitem/get_local_id.cl
+workitem/get_local_size.cl
+workitem/get_num_groups.cl
diff --git a/ptx-nvidiacl/include/clc/synchronization/barrier.h b/ptx-nvidiacl/lib/synchronization/barrier.cl
index cd9f327..fb36c26 100644
--- a/ptx-nvidiacl/include/clc/synchronization/barrier.h
+++ b/ptx-nvidiacl/lib/synchronization/barrier.cl
@@ -1,4 +1,6 @@
-_CLC_INLINE void barrier(cl_mem_fence_flags flags) {
+#include <clc/clc.h>
+
+_CLC_DEF void barrier(cl_mem_fence_flags flags) {
if (flags & CLK_LOCAL_MEM_FENCE) {
__builtin_ptx_bar_sync(0);
}
diff --git a/ptx-nvidiacl/include/clc/workitem/get_group_id.h b/ptx-nvidiacl/lib/workitem/get_group_id.cl
index 18b1bd4..2b35b4e 100644
--- a/ptx-nvidiacl/include/clc/workitem/get_group_id.h
+++ b/ptx-nvidiacl/lib/workitem/get_group_id.cl
@@ -1,4 +1,6 @@
-_CLC_INLINE size_t get_group_id(uint dim) {
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_group_id(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_ctaid_x();
case 1: return __builtin_ptx_read_ctaid_y();
diff --git a/ptx-nvidiacl/include/clc/workitem/get_local_id.h b/ptx-nvidiacl/lib/workitem/get_local_id.cl
index 1b8c776..f0cfdc0 100644
--- a/ptx-nvidiacl/include/clc/workitem/get_local_id.h
+++ b/ptx-nvidiacl/lib/workitem/get_local_id.cl
@@ -1,4 +1,6 @@
-_CLC_INLINE size_t get_local_id(uint dim) {
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_local_id(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_tid_x();
case 1: return __builtin_ptx_read_tid_y();
diff --git a/ptx-nvidiacl/include/clc/workitem/get_local_size.h b/ptx-nvidiacl/lib/workitem/get_local_size.cl
index cbc1f6e..c3f5425 100644
--- a/ptx-nvidiacl/include/clc/workitem/get_local_size.h
+++ b/ptx-nvidiacl/lib/workitem/get_local_size.cl
@@ -1,4 +1,6 @@
-_CLC_INLINE size_t get_local_size(uint dim) {
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_local_size(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_ntid_x();
case 1: return __builtin_ptx_read_ntid_y();
diff --git a/ptx-nvidiacl/include/clc/workitem/get_num_groups.h b/ptx-nvidiacl/lib/workitem/get_num_groups.cl
index 36ee849..90bdc2e 100644
--- a/ptx-nvidiacl/include/clc/workitem/get_num_groups.h
+++ b/ptx-nvidiacl/lib/workitem/get_num_groups.cl
@@ -1,4 +1,6 @@
-_CLC_INLINE size_t get_num_groups(uint dim) {
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_num_groups(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_nctaid_x();
case 1: return __builtin_ptx_read_nctaid_y();
diff --git a/ptx/lib/SOURCES b/ptx/lib/SOURCES
index aab8e3f..fb6e17f 100644
--- a/ptx/lib/SOURCES
+++ b/ptx/lib/SOURCES
@@ -1 +1,2 @@
integer/add_sat.ll
+integer/sub_sat.ll \ No newline at end of file
diff --git a/ptx/lib/integer/add_sat.ll b/ptx/lib/integer/add_sat.ll
index 9b8311c..f887962 100644
--- a/ptx/lib/integer/add_sat.ll
+++ b/ptx/lib/integer/add_sat.ll
@@ -1,55 +1,55 @@
declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
-define linkonce_odr ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+define ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
ret i8 %call
}
declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
-define linkonce_odr ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+define ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
ret i8 %call
}
declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
-define linkonce_odr ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+define ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
ret i16 %call
}
declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
-define linkonce_odr ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+define ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
ret i16 %call
}
declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
-define linkonce_odr ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+define ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
ret i32 %call
}
declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
-define linkonce_odr ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+define ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
ret i32 %call
}
declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
-define linkonce_odr ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+define ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
ret i64 %call
}
declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
-define linkonce_odr ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+define ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
ret i64 %call
}
diff --git a/ptx/lib/integer/sub_sat.ll b/ptx/lib/integer/sub_sat.ll
new file mode 100644
index 0000000..1a66eb5
--- /dev/null
+++ b/ptx/lib/integer/sub_sat.ll
@@ -0,0 +1,55 @@
+declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
+
+define ptx_device i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
+
+define ptx_device i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
+
+define ptx_device i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
+
+define ptx_device i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
+
+define ptx_device i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
+
+define ptx_device i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
+
+define ptx_device i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
+ ret i64 %call
+}
+
+declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
+
+define ptx_device i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
+ ret i64 %call
+}
diff --git a/test/rsqrt.cl b/test/rsqrt.cl
new file mode 100644
index 0000000..13ad216
--- /dev/null
+++ b/test/rsqrt.cl
@@ -0,0 +1,6 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+__kernel void foo(float4 *x, double4 *y) {
+ x[1] = rsqrt(x[0]);
+ y[1] = rsqrt(y[0]);
+}
diff --git a/test/subsat.cl b/test/subsat.cl
new file mode 100644
index 0000000..a83414b
--- /dev/null
+++ b/test/subsat.cl
@@ -0,0 +1,19 @@
+__kernel void test_subsat_char(char *a, char x, char y) {
+ *a = sub_sat(x, y);
+ return;
+}
+
+__kernel void test_subsat_uchar(uchar *a, uchar x, uchar y) {
+ *a = sub_sat(x, y);
+ return;
+}
+
+__kernel void test_subsat_long(long *a, long x, long y) {
+ *a = sub_sat(x, y);
+ return;
+}
+
+__kernel void test_subsat_ulong(ulong *a, ulong x, ulong y) {
+ *a = sub_sat(x, y);
+ return;
+} \ No newline at end of file