diff options
author | Jan Vesely <jan.vesely@rutgers.edu> | 2016-08-17 20:02:11 +0000 |
---|---|---|
committer | Jan Vesely <jan.vesely@rutgers.edu> | 2016-08-17 20:02:11 +0000 |
commit | 1b4b5532e5d0911ed6581cb8b93a8c286931773a (patch) | |
tree | 164ab8b6dd7e49cfaa289ed9b8f69360762ba6b3 | |
parent | 693817b954658d02ccaea50a9e5afc1f4504d5d0 (diff) |
Implement vstore_half{,n}
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@278962 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | generic/include/clc/shared/vstore.h | 45 | ||||
-rw-r--r-- | generic/lib/shared/vstore.cl | 32 | ||||
-rw-r--r-- | generic/lib/shared/vstore_half.inc | 10 |
3 files changed, 68 insertions, 19 deletions
diff --git a/generic/include/clc/shared/vstore.h b/generic/include/clc/shared/vstore.h index 1f784f8..ea8d4cf 100644 --- a/generic/include/clc/shared/vstore.h +++ b/generic/include/clc/shared/vstore.h @@ -1,17 +1,20 @@ -#define _CLC_VSTORE_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \ - _CLC_OVERLOAD _CLC_DECL void vstore##WIDTH(VEC_TYPE vec, size_t offset, ADDR_SPACE PRIM_TYPE *out); +#define _CLC_VSTORE_DECL(SUFFIX, PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \ + _CLC_OVERLOAD _CLC_DECL void vstore##SUFFIX##WIDTH(VEC_TYPE vec, size_t offset, ADDR_SPACE PRIM_TYPE *out); -#define _CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, ADDR_SPACE) \ - _CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \ - _CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \ - _CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \ - _CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \ - _CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE) +#define _CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \ + _CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \ + _CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \ + _CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \ + _CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \ + _CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE) + +#define _CLC_VECTOR_VSTORE_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \ + _CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \ + _CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \ + _CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \ #define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \ - _CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, __private) \ - _CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, __local) \ - _CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, __global) \ + _CLC_VECTOR_VSTORE_PRIM3(,PRIM_TYPE, PRIM_TYPE) \ #define _CLC_VECTOR_VSTORE_PRIM() \ _CLC_VECTOR_VSTORE_PRIM1(char) \ @@ -23,14 +26,18 @@ _CLC_VECTOR_VSTORE_PRIM1(long) \ _CLC_VECTOR_VSTORE_PRIM1(ulong) \ _CLC_VECTOR_VSTORE_PRIM1(float) \ - + _CLC_VECTOR_VSTORE_PRIM3(_half, half, float) + #ifdef cl_khr_fp64 -#define _CLC_VECTOR_VSTORE() \ - _CLC_VECTOR_VSTORE_PRIM1(double) \ - _CLC_VECTOR_VSTORE_PRIM() -#else -#define _CLC_VECTOR_VSTORE() \ - _CLC_VECTOR_VSTORE_PRIM() +#pragma cl_khr_fp64: enable + _CLC_VECTOR_VSTORE_PRIM1(double) + _CLC_VECTOR_VSTORE_PRIM3(_half, half, double) + _CLC_VSTORE_DECL(_half, half, double, , __private) + _CLC_VSTORE_DECL(_half, half, double, , __local) + _CLC_VSTORE_DECL(_half, half, double, , __global) #endif -_CLC_VECTOR_VSTORE() +_CLC_VECTOR_VSTORE_PRIM() +_CLC_VSTORE_DECL(_half, half, float, , __private) +_CLC_VSTORE_DECL(_half, half, float, , __local) +_CLC_VSTORE_DECL(_half, half, float, , __global) diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl index 4777b7e..ebc9446 100644 --- a/generic/lib/shared/vstore.cl +++ b/generic/lib/shared/vstore.cl @@ -50,3 +50,35 @@ VSTORE_TYPES() #pragma OPENCL EXTENSION cl_khr_fp64 : enable VSTORE_ADDR_SPACES(double) #endif + +/* vstore_half are legal even without cl_khr_fp16 */ + +#define VEC_STORE1(val) mem[offset++] = val; +#define VEC_STORE2(val) \ + VEC_STORE1(val.lo) \ + VEC_STORE1(val.hi) +#define VEC_STORE3(val) \ + VEC_STORE1(val.s0) \ + VEC_STORE1(val.s1) \ + VEC_STORE1(val.s2) +#define VEC_STORE4(val) \ + VEC_STORE2(val.lo) \ + VEC_STORE2(val.hi) +#define VEC_STORE8(val) \ + VEC_STORE4(val.lo) \ + VEC_STORE4(val.hi) +#define VEC_STORE16(val) \ + VEC_STORE8(val.lo) \ + VEC_STORE8(val.hi) + +#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \ + offset *= VEC_SIZE; \ + VEC_STORE##VEC_SIZE(vec) \ + } + +#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) + +#define __CLC_BODY "vstore_half.inc" +#include <clc/math/gentype.inc> + diff --git a/generic/lib/shared/vstore_half.inc b/generic/lib/shared/vstore_half.inc new file mode 100644 index 0000000..8ed03a0 --- /dev/null +++ b/generic/lib/shared/vstore_half.inc @@ -0,0 +1,10 @@ + +#ifdef __CLC_VECSIZE + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global); +#else + FUNC(, 1, __CLC_GENTYPE, __private); + FUNC(, 1, __CLC_GENTYPE, __local); + FUNC(, 1, __CLC_GENTYPE, __global); +#endif |