diff options
author | Junyan He <junyan.he@linux.intel.com> | 2014-09-01 10:08:34 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-09-04 12:07:42 +0800 |
commit | 359714e40e9c8f16c8b9bf1262885abd3c699957 (patch) | |
tree | 2a2e0f7b3a356563f3d719c2d67e8eaf52821815 /backend | |
parent | 9cefeade4ccebca81fbc0a69bd5ab290897c52e5 (diff) |
Add the misc module into the libocl
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@intel.com>
Diffstat (limited to 'backend')
-rw-r--r-- | backend/src/libocl/include/ocl_misc.h | 122 | ||||
-rw-r--r-- | backend/src/libocl/src/ocl_misc.cl | 201 |
2 files changed, 323 insertions, 0 deletions
diff --git a/backend/src/libocl/include/ocl_misc.h b/backend/src/libocl/include/ocl_misc.h new file mode 100644 index 0000000..da08d10 --- /dev/null +++ b/backend/src/libocl/include/ocl_misc.h @@ -0,0 +1,122 @@ +#ifndef __OCL_MISC_H__ +#define __OCL_MISC_H__ + +#include "ocl_types.h" + +#define DEC2(TYPE, XTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##2 shuffle(XTYPE x, MASKTYPE##2 mask); + +#define DEC4(TYPE, XTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##4 shuffle(XTYPE x, MASKTYPE##4 mask); + +#define DEC8(TYPE, XTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##8 shuffle(XTYPE x, MASKTYPE##8 mask); + +#define DEC16(TYPE, XTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##16 shuffle(XTYPE x, MASKTYPE##16 mask); + +#definedefine DEF(TYPE) \ + DEFMASK(TYPE, uchar) \ + DEFMASK(TYPE, ushort) \ + DEFMASK(TYPE, uint) \ + DEFMASK(TYPE, ulong) + +DEF(char) +DEF(uchar) +DEF(short) +DEF(ushort) +DEF(int) +DEF(uint) +DEF(float) +DEF(long) +DEF(ulong) +#undef DEF +#undef DEFMASK +#undef DEC2 +#undef DEC4 +#undef DEC8 +#undef DEC16 + +#define DEC2(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##2 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##2 mask); + +#define DEC2X(TYPE, MASKTYPE) \ + OVERLOADABLE TYPE##2 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##2 mask); + +#define DEC4(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##4 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##4 mask); + +#define DEC4X(TYPE, MASKTYPE) \ + OVERLOADABLE TYPE##4 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##4 mask); + +#define DEC8(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##8 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##8 mask); + +#define DEC8X(TYPE, MASKTYPE) \ + OVERLOADABLE TYPE##8 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##8 mask); + +#define DEC16(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##16 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##16 mask); + +#define DEC16X(TYPE, MASKTYPE) \ + OVERLOADABLE TYPE##16 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##16 mask); + +#definedefine DEF(TYPE) \ + DEFMASK(TYPE, uchar) \ + DEFMASK(TYPE, ushort) \ + DEFMASK(TYPE, uint) \ + DEFMASK(TYPE, ulong) + +DEF(char) +DEF(uchar) +DEF(short) +DEF(ushort) +DEF(int) +DEF(uint) +DEF(float) +DEF(long) +DEF(ulong) +#undef DEF +#undef DEFMASK +#undef DEC2 +#undef DEC2X +#undef DEC4 +#undef DEC4X +#undef DEC8 +#undef DEC8X +#undef DEC16 +#undef DEC16X + + +/* Temp to add the SIMD functions here. */ +///////////////////////////////////////////////////////////////////////////// +// SIMD level function +///////////////////////////////////////////////////////////////////////////// +short __gen_ocl_simd_any(short); +short __gen_ocl_simd_all(short); + +#endif diff --git a/backend/src/libocl/src/ocl_misc.cl b/backend/src/libocl/src/ocl_misc.cl new file mode 100644 index 0000000..fbcc94a --- /dev/null +++ b/backend/src/libocl/src/ocl_misc.cl @@ -0,0 +1,201 @@ +#include "ocl_misc.h" + +#define DEC2(TYPE, XTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##2 shuffle(XTYPE x, MASKTYPE##2 mask) { \ + TYPE##2 y; \ + y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \ + y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \ + return y; \ + } + +#define DEC4(TYPE, XTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##4 shuffle(XTYPE x, MASKTYPE##4 mask) { \ + TYPE##4 y; \ + y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \ + y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \ + y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \ + y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \ + return y; \ + } + +#define DEC8(TYPE, XTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##8 shuffle(XTYPE x, MASKTYPE##8 mask) { \ + TYPE##8 y; \ + y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \ + y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \ + y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \ + y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \ + y.s4 = ((TYPE *) &x)[mask.s4 & (vec_step(x) - 1)]; \ + y.s5 = ((TYPE *) &x)[mask.s5 & (vec_step(x) - 1)]; \ + y.s6 = ((TYPE *) &x)[mask.s6 & (vec_step(x) - 1)]; \ + y.s7 = ((TYPE *) &x)[mask.s7 & (vec_step(x) - 1)]; \ + return y; \ + } + +#define DEC16(TYPE, XTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##16 shuffle(XTYPE x, MASKTYPE##16 mask) { \ + TYPE##16 y; \ + y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \ + y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \ + y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \ + y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \ + y.s4 = ((TYPE *) &x)[mask.s4 & (vec_step(x) - 1)]; \ + y.s5 = ((TYPE *) &x)[mask.s5 & (vec_step(x) - 1)]; \ + y.s6 = ((TYPE *) &x)[mask.s6 & (vec_step(x) - 1)]; \ + y.s7 = ((TYPE *) &x)[mask.s7 & (vec_step(x) - 1)]; \ + y.s8 = ((TYPE *) &x)[mask.s8 & (vec_step(x) - 1)]; \ + y.s9 = ((TYPE *) &x)[mask.s9 & (vec_step(x) - 1)]; \ + y.sa = ((TYPE *) &x)[mask.sa & (vec_step(x) - 1)]; \ + y.sb = ((TYPE *) &x)[mask.sb & (vec_step(x) - 1)]; \ + y.sc = ((TYPE *) &x)[mask.sc & (vec_step(x) - 1)]; \ + y.sd = ((TYPE *) &x)[mask.sd & (vec_step(x) - 1)]; \ + y.se = ((TYPE *) &x)[mask.se & (vec_step(x) - 1)]; \ + y.sf = ((TYPE *) &x)[mask.sf & (vec_step(x) - 1)]; \ + return y; \ + } + +#definedefine DEF(TYPE) \ + DEFMASK(TYPE, uchar) \ + DEFMASK(TYPE, ushort) \ + DEFMASK(TYPE, uint) \ + DEFMASK(TYPE, ulong) + +DEF(char) +DEF(uchar) +DEF(short) +DEF(ushort) +DEF(int) +DEF(uint) +DEF(float) +DEF(long) +DEF(ulong) +#undef DEF +#undef DEFMASK +#undef DEC2 +#undef DEC4 +#undef DEC8 +#undef DEC16 + +#define DEC2(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##2 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##2 mask) { \ + return shuffle((TEMPTYPE)(x, y), mask); \ + } + +#define DEC2X(TYPE, MASKTYPE) \ + OVERLOADABLE TYPE##2 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##2 mask) { \ + TYPE##2 z; \ + z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \ + z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \ + return z; \ + } + +#define DEC4(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##4 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##4 mask) { \ + return shuffle((TEMPTYPE)(x, y), mask); \ + } + +#define DEC4X(TYPE, MASKTYPE) \ + OVERLOADABLE TYPE##4 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##4 mask) { \ + TYPE##4 z; \ + z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \ + z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \ + z.s2 = mask.s2 < 16 ? ((TYPE *)&x)[mask.s2] : ((TYPE *)&y)[mask.s2 & 15]; \ + z.s3 = mask.s3 < 16 ? ((TYPE *)&x)[mask.s3] : ((TYPE *)&y)[mask.s3 & 15]; \ + return z; \ + } + +#define DEC8(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##8 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##8 mask) { \ + return shuffle((TEMPTYPE)(x, y), mask); \ + } + +#define DEC8X(TYPE, MASKTYPE) \ + OVERLOADABLE TYPE##8 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##8 mask) { \ + TYPE##8 z; \ + z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \ + z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \ + z.s2 = mask.s2 < 16 ? ((TYPE *)&x)[mask.s2] : ((TYPE *)&y)[mask.s2 & 15]; \ + z.s3 = mask.s3 < 16 ? ((TYPE *)&x)[mask.s3] : ((TYPE *)&y)[mask.s3 & 15]; \ + z.s4 = mask.s4 < 16 ? ((TYPE *)&x)[mask.s4] : ((TYPE *)&y)[mask.s4 & 15]; \ + z.s5 = mask.s5 < 16 ? ((TYPE *)&x)[mask.s5] : ((TYPE *)&y)[mask.s5 & 15]; \ + z.s6 = mask.s6 < 16 ? ((TYPE *)&x)[mask.s6] : ((TYPE *)&y)[mask.s6 & 15]; \ + z.s7 = mask.s7 < 16 ? ((TYPE *)&x)[mask.s7] : ((TYPE *)&y)[mask.s7 & 15]; \ + return z; \ + } + +#define DEC16(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \ + OVERLOADABLE TYPE##16 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##16 mask) { \ + return shuffle((TEMPTYPE)(x, y), mask); \ + } + +#define DEC16X(TYPE, MASKTYPE) \ + OVERLOADABLE TYPE##16 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##16 mask) { \ + TYPE##16 z; \ + z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \ + z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \ + z.s2 = mask.s2 < 16 ? ((TYPE *)&x)[mask.s2] : ((TYPE *)&y)[mask.s2 & 15]; \ + z.s3 = mask.s3 < 16 ? ((TYPE *)&x)[mask.s3] : ((TYPE *)&y)[mask.s3 & 15]; \ + z.s4 = mask.s4 < 16 ? ((TYPE *)&x)[mask.s4] : ((TYPE *)&y)[mask.s4 & 15]; \ + z.s5 = mask.s5 < 16 ? ((TYPE *)&x)[mask.s5] : ((TYPE *)&y)[mask.s5 & 15]; \ + z.s6 = mask.s6 < 16 ? ((TYPE *)&x)[mask.s6] : ((TYPE *)&y)[mask.s6 & 15]; \ + z.s7 = mask.s7 < 16 ? ((TYPE *)&x)[mask.s7] : ((TYPE *)&y)[mask.s7 & 15]; \ + z.s8 = mask.s8 < 16 ? ((TYPE *)&x)[mask.s8] : ((TYPE *)&y)[mask.s8 & 15]; \ + z.s9 = mask.s9 < 16 ? ((TYPE *)&x)[mask.s9] : ((TYPE *)&y)[mask.s9 & 15]; \ + z.sa = mask.sa < 16 ? ((TYPE *)&x)[mask.sa] : ((TYPE *)&y)[mask.sa & 15]; \ + z.sb = mask.sb < 16 ? ((TYPE *)&x)[mask.sb] : ((TYPE *)&y)[mask.sb & 15]; \ + z.sc = mask.sc < 16 ? ((TYPE *)&x)[mask.sc] : ((TYPE *)&y)[mask.sc & 15]; \ + z.sd = mask.sd < 16 ? ((TYPE *)&x)[mask.sd] : ((TYPE *)&y)[mask.sd & 15]; \ + z.se = mask.se < 16 ? ((TYPE *)&x)[mask.se] : ((TYPE *)&y)[mask.se & 15]; \ + z.sf = mask.sf < 16 ? ((TYPE *)&x)[mask.sf] : ((TYPE *)&y)[mask.sf & 15]; \ + return z; \ + } + +#definedefine DEF(TYPE) \ + DEFMASK(TYPE, uchar) \ + DEFMASK(TYPE, ushort) \ + DEFMASK(TYPE, uint) \ + DEFMASK(TYPE, ulong) + +DEF(char) +DEF(uchar) +DEF(short) +DEF(ushort) +DEF(int) +DEF(uint) +DEF(float) +DEF(long) +DEF(ulong) +#undef DEF +#undef DEFMASK +#undef DEC2 +#undef DEC2X +#undef DEC4 +#undef DEC4X +#undef DEC8 +#undef DEC8X +#undef DEC16 +#undef DEC16X |