summaryrefslogtreecommitdiff
path: root/backend/kernels
diff options
context:
space:
mode:
authorBenjamin Segovia <segovia.benjamin@gmail.com>2012-02-29 20:37:17 +0000
committerKeith Packard <keithp@keithp.com>2012-08-10 16:15:30 -0700
commitca43f8273cf5aa82b8cf77e26ada33846d772325 (patch)
tree630808a9bdbebe365d8bc1c76d42113d1ed70901 /backend/kernels
parented6ae6a4408e2faa1349f105cd4967737e693de2 (diff)
Added first support for immediates Started to add support for builtin functions
Diffstat (limited to 'backend/kernels')
-rw-r--r--backend/kernels/add2.cl2
-rw-r--r--backend/kernels/add2.ll5
-rw-r--r--backend/kernels/add2.obin488 -> 496 bytes
-rw-r--r--backend/kernels/get_global_id.cbe.c162
-rw-r--r--backend/kernels/get_global_id.cl17
-rw-r--r--backend/kernels/get_global_id.ll22
-rw-r--r--backend/kernels/get_global_id.obin0 -> 596 bytes
7 files changed, 205 insertions, 3 deletions
diff --git a/backend/kernels/add2.cl b/backend/kernels/add2.cl
index c43e2c34..80705768 100644
--- a/backend/kernels/add2.cl
+++ b/backend/kernels/add2.cl
@@ -6,7 +6,7 @@ __kernel struct big add(unsigned int x, unsigned int y)
{
struct big p;
p.a = x + y;
- p.b = x - y;
+ p.b = x - y + 10;
return p;
}
diff --git a/backend/kernels/add2.ll b/backend/kernels/add2.ll
index 37cf7a3e..4ca11259 100644
--- a/backend/kernels/add2.ll
+++ b/backend/kernels/add2.ll
@@ -7,11 +7,12 @@ target triple = "ptx32--"
define ptx_kernel void @add(%struct.big* noalias nocapture sret %agg.result, i32 %x, i32 %y) nounwind noinline {
entry:
%add = add i32 %y, %x
- %sub = sub i32 %x, %y
+ %sub = add i32 %x, 10
+ %add1 = sub i32 %sub, %y
%agg.result.0 = getelementptr inbounds %struct.big* %agg.result, i32 0, i32 0
store i32 %add, i32* %agg.result.0, align 4
%agg.result.1 = getelementptr inbounds %struct.big* %agg.result, i32 0, i32 1
- store i32 %sub, i32* %agg.result.1, align 4
+ store i32 %add1, i32* %agg.result.1, align 4
ret void
}
diff --git a/backend/kernels/add2.o b/backend/kernels/add2.o
index 1feb0355..8b5ebb42 100644
--- a/backend/kernels/add2.o
+++ b/backend/kernels/add2.o
Binary files differ
diff --git a/backend/kernels/get_global_id.cbe.c b/backend/kernels/get_global_id.cbe.c
new file mode 100644
index 00000000..4dbae41c
--- /dev/null
+++ b/backend/kernels/get_global_id.cbe.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/* Provide Declarations */
+#include <stdarg.h>
+#include <setjmp.h>
+#include <limits.h>
+/* get a declaration for alloca */
+#if defined(__CYGWIN__) || defined(__MINGW32__)
+#define alloca(x) __builtin_alloca((x))
+#define _alloca(x) __builtin_alloca((x))
+#elif defined(__APPLE__)
+extern void *__builtin_alloca(unsigned long);
+#define alloca(x) __builtin_alloca(x)
+#define longjmp _longjmp
+#define setjmp _setjmp
+#elif defined(__sun__)
+#if defined(__sparcv9)
+extern void *__builtin_alloca(unsigned long);
+#else
+extern void *__builtin_alloca(unsigned int);
+#endif
+#define alloca(x) __builtin_alloca(x)
+#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)
+#define alloca(x) __builtin_alloca(x)
+#elif defined(_MSC_VER)
+#define inline _inline
+#define alloca(x) _alloca(x)
+#else
+#include <alloca.h>
+#endif
+
+#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */
+#define __attribute__(X)
+#endif
+
+#if defined(__GNUC__) && defined(__APPLE_CC__)
+#define __EXTERNAL_WEAK__ __attribute__((weak_import))
+#elif defined(__GNUC__)
+#define __EXTERNAL_WEAK__ __attribute__((weak))
+#else
+#define __EXTERNAL_WEAK__
+#endif
+
+#if defined(__GNUC__) && defined(__APPLE_CC__)
+#define __ATTRIBUTE_WEAK__
+#elif defined(__GNUC__)
+#define __ATTRIBUTE_WEAK__ __attribute__((weak))
+#else
+#define __ATTRIBUTE_WEAK__
+#endif
+
+#if defined(__GNUC__)
+#define __HIDDEN__ __attribute__((visibility("hidden")))
+#endif
+
+#ifdef __GNUC__
+#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */
+#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */
+#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */
+#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
+#define LLVM_INF __builtin_inf() /* Double */
+#define LLVM_INFF __builtin_inff() /* Float */
+#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
+#define __ATTRIBUTE_CTOR__ __attribute__((constructor))
+#define __ATTRIBUTE_DTOR__ __attribute__((destructor))
+#define LLVM_ASM __asm__
+#else
+#define LLVM_NAN(NanStr) ((double)0.0) /* Double */
+#define LLVM_NANF(NanStr) 0.0F /* Float */
+#define LLVM_NANS(NanStr) ((double)0.0) /* Double */
+#define LLVM_NANSF(NanStr) 0.0F /* Float */
+#define LLVM_INF ((double)0.0) /* Double */
+#define LLVM_INFF 0.0F /* Float */
+#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */
+#define __ATTRIBUTE_CTOR__
+#define __ATTRIBUTE_DTOR__
+#define LLVM_ASM(X)
+#endif
+
+#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */
+#define __builtin_stack_save() 0 /* not implemented */
+#define __builtin_stack_restore(X) /* noop */
+#endif
+
+#if __GNUC__ && __LP64__ /* 128-bit integer types */
+typedef int __attribute__((mode(TI))) llvmInt128;
+typedef unsigned __attribute__((mode(TI))) llvmUInt128;
+#endif
+
+#define CODE_FOR_MAIN() /* Any target-specific code for main()*/
+
+#ifndef __cplusplus
+typedef unsigned char bool;
+#endif
+
+
+/* Support for floating point constants */
+typedef unsigned long long ConstantDoubleTy;
+typedef unsigned int ConstantFloatTy;
+typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
+typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
+
+
+/* Global Declarations */
+/* Helper union for bitcasts */
+typedef union {
+ unsigned int Int32;
+ unsigned long long Int64;
+ float Float;
+ double Double;
+} llvmBitCastUnion;
+
+/* Function Declarations */
+double fmod(double, double);
+float fmodf(float, float);
+long double fmodl(long double, long double);
+void test_global_id(unsigned int *llvm_cbe_dst);
+unsigned int __gen_get_global_id0(void);
+void abort(void);
+
+
+/* Function Bodies */
+static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; }
+static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; }
+static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); }
+static inline int llvm_fcmp_une(double X, double Y) { return X != Y; }
+static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); }
+static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); }
+static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); }
+static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); }
+static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; }
+static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); }
+static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; }
+static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; }
+static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
+static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
+
+void test_global_id(unsigned int *llvm_cbe_dst) {
+ unsigned int llvm_cbe_call_2e_i;
+
+ llvm_cbe_call_2e_i = /*tail*/ __gen_get_global_id0();
+ *((&llvm_cbe_dst[((signed int )llvm_cbe_call_2e_i)])) = 1u;
+ return;
+}
+
diff --git a/backend/kernels/get_global_id.cl b/backend/kernels/get_global_id.cl
new file mode 100644
index 00000000..86500ada
--- /dev/null
+++ b/backend/kernels/get_global_id.cl
@@ -0,0 +1,17 @@
+__attribute__((pure)) unsigned int __gen_get_global_id0(void);
+__attribute__((pure)) unsigned int __gen_get_global_id1(void);
+__attribute__((pure)) unsigned int __gen_get_global_id2(void);
+
+inline unsigned get_global_id(unsigned int dim) {
+ if (dim == 0) return __gen_get_global_id0();
+ else if (dim == 1) return __gen_get_global_id1();
+ else if (dim == 2) return __gen_get_global_id2();
+ else return 0;
+}
+
+__kernel void test_global_id(__global int *dst)
+{
+ short hop = get_global_id(0);
+ dst[get_global_id(0)] = hop;
+}
+
diff --git a/backend/kernels/get_global_id.ll b/backend/kernels/get_global_id.ll
new file mode 100644
index 00000000..965739a1
--- /dev/null
+++ b/backend/kernels/get_global_id.ll
@@ -0,0 +1,22 @@
+; ModuleID = 'get_global_id.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @test_global_id(i32* nocapture %dst) nounwind noinline {
+get_global_id.exit5:
+ %call.i = tail call ptx_device i32 @__gen_get_global_id0() nounwind readonly
+ %sext = shl i32 %call.i, 16
+ %conv1 = ashr exact i32 %sext, 16
+ %arrayidx = getelementptr inbounds i32* %dst, i32 %call.i
+ store i32 %conv1, i32* %arrayidx, align 4, !tbaa !1
+ ret void
+}
+
+declare ptx_device i32 @__gen_get_global_id0() nounwind readonly
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (i32*)* @test_global_id}
+!1 = metadata !{metadata !"int", metadata !2}
+!2 = metadata !{metadata !"omnipotent char", metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/backend/kernels/get_global_id.o b/backend/kernels/get_global_id.o
new file mode 100644
index 00000000..e21b2e1f
--- /dev/null
+++ b/backend/kernels/get_global_id.o
Binary files differ