diff options
author | Benjamin Segovia <segovia.benjamin@gmail.com> | 2012-02-29 20:37:17 +0000 |
---|---|---|
committer | Keith Packard <keithp@keithp.com> | 2012-08-10 16:15:30 -0700 |
commit | ca43f8273cf5aa82b8cf77e26ada33846d772325 (patch) | |
tree | 630808a9bdbebe365d8bc1c76d42113d1ed70901 /backend/kernels | |
parent | ed6ae6a4408e2faa1349f105cd4967737e693de2 (diff) |
Added first support for immediates Started to add support for builtin functions
Diffstat (limited to 'backend/kernels')
-rw-r--r-- | backend/kernels/add2.cl | 2 | ||||
-rw-r--r-- | backend/kernels/add2.ll | 5 | ||||
-rw-r--r-- | backend/kernels/add2.o | bin | 488 -> 496 bytes | |||
-rw-r--r-- | backend/kernels/get_global_id.cbe.c | 162 | ||||
-rw-r--r-- | backend/kernels/get_global_id.cl | 17 | ||||
-rw-r--r-- | backend/kernels/get_global_id.ll | 22 | ||||
-rw-r--r-- | backend/kernels/get_global_id.o | bin | 0 -> 596 bytes |
7 files changed, 205 insertions, 3 deletions
diff --git a/backend/kernels/add2.cl b/backend/kernels/add2.cl index c43e2c34..80705768 100644 --- a/backend/kernels/add2.cl +++ b/backend/kernels/add2.cl @@ -6,7 +6,7 @@ __kernel struct big add(unsigned int x, unsigned int y) { struct big p; p.a = x + y; - p.b = x - y; + p.b = x - y + 10; return p; } diff --git a/backend/kernels/add2.ll b/backend/kernels/add2.ll index 37cf7a3e..4ca11259 100644 --- a/backend/kernels/add2.ll +++ b/backend/kernels/add2.ll @@ -7,11 +7,12 @@ target triple = "ptx32--" define ptx_kernel void @add(%struct.big* noalias nocapture sret %agg.result, i32 %x, i32 %y) nounwind noinline { entry: %add = add i32 %y, %x - %sub = sub i32 %x, %y + %sub = add i32 %x, 10 + %add1 = sub i32 %sub, %y %agg.result.0 = getelementptr inbounds %struct.big* %agg.result, i32 0, i32 0 store i32 %add, i32* %agg.result.0, align 4 %agg.result.1 = getelementptr inbounds %struct.big* %agg.result, i32 0, i32 1 - store i32 %sub, i32* %agg.result.1, align 4 + store i32 %add1, i32* %agg.result.1, align 4 ret void } diff --git a/backend/kernels/add2.o b/backend/kernels/add2.o Binary files differindex 1feb0355..8b5ebb42 100644 --- a/backend/kernels/add2.o +++ b/backend/kernels/add2.o diff --git a/backend/kernels/get_global_id.cbe.c b/backend/kernels/get_global_id.cbe.c new file mode 100644 index 00000000..4dbae41c --- /dev/null +++ b/backend/kernels/get_global_id.cbe.c @@ -0,0 +1,162 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Benjamin Segovia <benjamin.segovia@intel.com> + */ + +/* Provide Declarations */ +#include <stdarg.h> +#include <setjmp.h> +#include <limits.h> +/* get a declaration for alloca */ +#if defined(__CYGWIN__) || defined(__MINGW32__) +#define alloca(x) __builtin_alloca((x)) +#define _alloca(x) __builtin_alloca((x)) +#elif defined(__APPLE__) +extern void *__builtin_alloca(unsigned long); +#define alloca(x) __builtin_alloca(x) +#define longjmp _longjmp +#define setjmp _setjmp +#elif defined(__sun__) +#if defined(__sparcv9) +extern void *__builtin_alloca(unsigned long); +#else +extern void *__builtin_alloca(unsigned int); +#endif +#define alloca(x) __builtin_alloca(x) +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__) +#define alloca(x) __builtin_alloca(x) +#elif defined(_MSC_VER) +#define inline _inline +#define alloca(x) _alloca(x) +#else +#include <alloca.h> +#endif + +#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */ +#define __attribute__(X) +#endif + +#if defined(__GNUC__) && defined(__APPLE_CC__) +#define __EXTERNAL_WEAK__ __attribute__((weak_import)) +#elif defined(__GNUC__) +#define __EXTERNAL_WEAK__ __attribute__((weak)) +#else +#define __EXTERNAL_WEAK__ +#endif + +#if defined(__GNUC__) && defined(__APPLE_CC__) +#define __ATTRIBUTE_WEAK__ +#elif defined(__GNUC__) +#define __ATTRIBUTE_WEAK__ __attribute__((weak)) +#else +#define __ATTRIBUTE_WEAK__ +#endif + +#if defined(__GNUC__) +#define __HIDDEN__ __attribute__((visibility("hidden"))) +#endif + +#ifdef __GNUC__ +#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */ +#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */ +#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */ +#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */ +#define LLVM_INF __builtin_inf() /* Double */ +#define LLVM_INFF __builtin_inff() /* Float */ +#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality) +#define __ATTRIBUTE_CTOR__ __attribute__((constructor)) +#define __ATTRIBUTE_DTOR__ __attribute__((destructor)) +#define LLVM_ASM __asm__ +#else +#define LLVM_NAN(NanStr) ((double)0.0) /* Double */ +#define LLVM_NANF(NanStr) 0.0F /* Float */ +#define LLVM_NANS(NanStr) ((double)0.0) /* Double */ +#define LLVM_NANSF(NanStr) 0.0F /* Float */ +#define LLVM_INF ((double)0.0) /* Double */ +#define LLVM_INFF 0.0F /* Float */ +#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */ +#define __ATTRIBUTE_CTOR__ +#define __ATTRIBUTE_DTOR__ +#define LLVM_ASM(X) +#endif + +#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ +#define __builtin_stack_save() 0 /* not implemented */ +#define __builtin_stack_restore(X) /* noop */ +#endif + +#if __GNUC__ && __LP64__ /* 128-bit integer types */ +typedef int __attribute__((mode(TI))) llvmInt128; +typedef unsigned __attribute__((mode(TI))) llvmUInt128; +#endif + +#define CODE_FOR_MAIN() /* Any target-specific code for main()*/ + +#ifndef __cplusplus +typedef unsigned char bool; +#endif + + +/* Support for floating point constants */ +typedef unsigned long long ConstantDoubleTy; +typedef unsigned int ConstantFloatTy; +typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty; +typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty; + + +/* Global Declarations */ +/* Helper union for bitcasts */ +typedef union { + unsigned int Int32; + unsigned long long Int64; + float Float; + double Double; +} llvmBitCastUnion; + +/* Function Declarations */ +double fmod(double, double); +float fmodf(float, float); +long double fmodl(long double, long double); +void test_global_id(unsigned int *llvm_cbe_dst); +unsigned int __gen_get_global_id0(void); +void abort(void); + + +/* Function Bodies */ +static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; } +static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; } +static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); } +static inline int llvm_fcmp_une(double X, double Y) { return X != Y; } +static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); } +static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); } +static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); } +static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); } +static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; } +static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); } +static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; } +static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; } +static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; } +static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; } + +void test_global_id(unsigned int *llvm_cbe_dst) { + unsigned int llvm_cbe_call_2e_i; + + llvm_cbe_call_2e_i = /*tail*/ __gen_get_global_id0(); + *((&llvm_cbe_dst[((signed int )llvm_cbe_call_2e_i)])) = 1u; + return; +} + diff --git a/backend/kernels/get_global_id.cl b/backend/kernels/get_global_id.cl new file mode 100644 index 00000000..86500ada --- /dev/null +++ b/backend/kernels/get_global_id.cl @@ -0,0 +1,17 @@ +__attribute__((pure)) unsigned int __gen_get_global_id0(void); +__attribute__((pure)) unsigned int __gen_get_global_id1(void); +__attribute__((pure)) unsigned int __gen_get_global_id2(void); + +inline unsigned get_global_id(unsigned int dim) { + if (dim == 0) return __gen_get_global_id0(); + else if (dim == 1) return __gen_get_global_id1(); + else if (dim == 2) return __gen_get_global_id2(); + else return 0; +} + +__kernel void test_global_id(__global int *dst) +{ + short hop = get_global_id(0); + dst[get_global_id(0)] = hop; +} + diff --git a/backend/kernels/get_global_id.ll b/backend/kernels/get_global_id.ll new file mode 100644 index 00000000..965739a1 --- /dev/null +++ b/backend/kernels/get_global_id.ll @@ -0,0 +1,22 @@ +; ModuleID = 'get_global_id.o' +target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" +target triple = "ptx32--" + +define ptx_kernel void @test_global_id(i32* nocapture %dst) nounwind noinline { +get_global_id.exit5: + %call.i = tail call ptx_device i32 @__gen_get_global_id0() nounwind readonly + %sext = shl i32 %call.i, 16 + %conv1 = ashr exact i32 %sext, 16 + %arrayidx = getelementptr inbounds i32* %dst, i32 %call.i + store i32 %conv1, i32* %arrayidx, align 4, !tbaa !1 + ret void +} + +declare ptx_device i32 @__gen_get_global_id0() nounwind readonly + +!opencl.kernels = !{!0} + +!0 = metadata !{void (i32*)* @test_global_id} +!1 = metadata !{metadata !"int", metadata !2} +!2 = metadata !{metadata !"omnipotent char", metadata !3} +!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/get_global_id.o b/backend/kernels/get_global_id.o Binary files differnew file mode 100644 index 00000000..e21b2e1f --- /dev/null +++ b/backend/kernels/get_global_id.o |