/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*
* Author: Benjamin Segovia
*/
#define DECL_INTERNAL_WORK_ITEM_FN(NAME) \
__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##0(void); \
__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##1(void); \
__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##2(void);
DECL_INTERNAL_WORK_ITEM_FN(get_group_id)
DECL_INTERNAL_WORK_ITEM_FN(get_local_id)
DECL_INTERNAL_WORK_ITEM_FN(get_local_size)
DECL_INTERNAL_WORK_ITEM_FN(get_global_size)
DECL_INTERNAL_WORK_ITEM_FN(get_num_groups)
#undef DECL_INTERNAL_WORK_ITEM_FN
#define DECL_PUBLIC_WORK_ITEM_FN(NAME) \
inline unsigned NAME(unsigned int dim) { \
if (dim == 0) return __gen_ocl_##NAME##0(); \
else if (dim == 1) return __gen_ocl_##NAME##1(); \
else if (dim == 2) return __gen_ocl_##NAME##2(); \
else return 0; \
}
DECL_PUBLIC_WORK_ITEM_FN(get_group_id)
DECL_PUBLIC_WORK_ITEM_FN(get_local_id)
DECL_PUBLIC_WORK_ITEM_FN(get_local_size)
DECL_PUBLIC_WORK_ITEM_FN(get_global_size)
DECL_PUBLIC_WORK_ITEM_FN(get_num_groups)
#undef DECL_PUBLIC_WORK_ITEM_FN
inline unsigned int get_global_id(unsigned int dim) {
return get_local_id(dim) + get_local_size(dim) * get_num_groups(dim);
}
__attribute__ ((pure,const,overloadable)) float mad(float a, float b, float c);
__attribute__((overloadable)) inline unsigned select(unsigned src0, unsigned src1, unsigned cond) {
return cond ? src0 : src1;
}
__attribute__((overloadable)) inline int select(int src0, int src1, int cond) {
return cond ? src0 : src1;
}
typedef float float2 __attribute__((ext_vector_type(2)));
typedef float float3 __attribute__((ext_vector_type(3)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef int int2 __attribute__((ext_vector_type(2)));
typedef int int3 __attribute__((ext_vector_type(3)));
typedef int int4 __attribute__((ext_vector_type(4)));
typedef int uint2 __attribute__((ext_vector_type(2)));
typedef unsigned uint3 __attribute__((ext_vector_type(3)));
typedef unsigned uint4 __attribute__((ext_vector_type(4)));
typedef bool bool2 __attribute__((ext_vector_type(2)));
typedef bool bool3 __attribute__((ext_vector_type(3)));
typedef bool bool4 __attribute__((ext_vector_type(4)));
// This will be optimized out by LLVM and will output LLVM select instructions
#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \
__attribute__((overloadable)) \
inline TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \
TYPE4 dst; \
const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \
const TYPE x1 = src1.x; \
const TYPE y0 = src0.y; \
const TYPE y1 = src1.y; \
const TYPE z0 = src0.z; \
const TYPE z1 = src1.z; \
const TYPE w0 = src0.w; \
const TYPE w1 = src1.w; \
\
dst.x = (cond.x & MASK) ? x1 : x0; \
dst.y = (cond.y & MASK) ? y1 : y0; \
dst.z = (cond.z & MASK) ? z1 : z0; \
dst.w = (cond.w & MASK) ? w1 : w0; \
return dst; \
}
DECL_SELECT4(int4, int, int4, 0x80000000)
DECL_SELECT4(float4, float, int4, 0x80000000)
#undef DECL_SELECT4
__attribute__((overloadable,always_inline)) inline float2 mad(float2 a, float2 b, float2 c) {
return (float2)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y));
}
__attribute__((overloadable,always_inline)) inline float3 mad(float3 a, float3 b, float3 c) {
return (float3)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), mad(a.z,b.z,c.z));
}
__attribute__((overloadable,always_inline)) inline float4 mad(float4 a, float4 b, float4 c) {
return (float4)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y),
mad(a.z,b.z,c.z), mad(a.w,b.w,c.w));
}
#define __private __attribute__((address_space(0)))
#define __global __attribute__((address_space(1)))
#define __constant __attribute__((address_space(2)))
//#define __local __attribute__((address_space(3)))
#define global __global
//#define local __local
#define constant __constant
#define private __private
#define NULL ((void*)0)