From 8337635527f1f33eba60d02555774f978c2a11ff Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Wed, 3 Jul 2013 11:53:35 -0500 Subject: From: Tom Stellard Implement mad24 and mul24 builtins --- generic/include/clc/integer/abs.h | 2 +- generic/include/clc/integer/abs_diff.h | 2 +- generic/include/clc/integer/add_sat.h | 2 +- generic/include/clc/integer/clz.h | 2 +- generic/include/clc/integer/gentype.inc | 435 ------------------------ generic/include/clc/integer/integer-gentype.inc | 435 ++++++++++++++++++++++++ generic/include/clc/integer/mad24.h | 28 +- generic/include/clc/integer/mad24.inc | 1 + generic/include/clc/integer/mul24.h | 28 +- generic/include/clc/integer/mul24.inc | 1 + generic/include/clc/integer/rotate.h | 2 +- generic/include/clc/integer/sub_sat.h | 2 +- generic/include/clc/shared/clamp.h | 2 +- generic/include/clc/shared/max.h | 2 +- generic/include/clc/shared/min.h | 2 +- generic/lib/integer/abs.cl | 2 +- generic/lib/integer/abs_diff.cl | 2 +- generic/lib/integer/mad24.cl | 55 +-- generic/lib/integer/mad24.inc | 6 +- generic/lib/integer/mul24.cl | 55 +-- generic/lib/integer/mul24.inc | 12 +- generic/lib/integer/rotate.cl | 2 +- generic/lib/shared/clamp.cl | 2 +- generic/lib/shared/max.cl | 2 +- generic/lib/shared/min.cl | 2 +- 25 files changed, 475 insertions(+), 611 deletions(-) delete mode 100644 generic/include/clc/integer/gentype.inc create mode 100644 generic/include/clc/integer/integer-gentype.inc create mode 100644 generic/include/clc/integer/mad24.inc create mode 100644 generic/include/clc/integer/mul24.inc diff --git a/generic/include/clc/integer/abs.h b/generic/include/clc/integer/abs.h index 77a4cbe..717a48b 100644 --- a/generic/include/clc/integer/abs.h +++ b/generic/include/clc/integer/abs.h @@ -1,2 +1,2 @@ #define __CLC_BODY -#include +#include diff --git a/generic/include/clc/integer/abs_diff.h b/generic/include/clc/integer/abs_diff.h index 3f3b4b4..1c1da33 100644 --- a/generic/include/clc/integer/abs_diff.h +++ b/generic/include/clc/integer/abs_diff.h @@ -1,2 +1,2 @@ #define __CLC_BODY -#include +#include diff --git a/generic/include/clc/integer/add_sat.h b/generic/include/clc/integer/add_sat.h index 2e5e698..6f6998e 100644 --- a/generic/include/clc/integer/add_sat.h +++ b/generic/include/clc/integer/add_sat.h @@ -1,2 +1,2 @@ #define __CLC_BODY -#include +#include diff --git a/generic/include/clc/integer/clz.h b/generic/include/clc/integer/clz.h index f7cdbf7..3ccb5e2 100644 --- a/generic/include/clc/integer/clz.h +++ b/generic/include/clc/integer/clz.h @@ -1,2 +1,2 @@ #define __CLC_BODY -#include +#include diff --git a/generic/include/clc/integer/gentype.inc b/generic/include/clc/integer/gentype.inc deleted file mode 100644 index 6f4d699..0000000 --- a/generic/include/clc/integer/gentype.inc +++ /dev/null @@ -1,435 +0,0 @@ -//These 2 defines only change when switching between data sizes or base types to -//keep this file manageable. -#define __CLC_GENSIZE 8 -#define __CLC_SCALAR_GENTYPE char - -#define __CLC_GENTYPE char -#define __CLC_U_GENTYPE uchar -#define __CLC_S_GENTYPE char -#define __CLC_SCALAR 1 -#include __CLC_BODY -#undef __CLC_SCALAR -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE char2 -#define __CLC_U_GENTYPE uchar2 -#define __CLC_S_GENTYPE char2 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE char3 -#define __CLC_U_GENTYPE uchar3 -#define __CLC_S_GENTYPE char3 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE char4 -#define __CLC_U_GENTYPE uchar4 -#define __CLC_S_GENTYPE char4 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE char8 -#define __CLC_U_GENTYPE uchar8 -#define __CLC_S_GENTYPE char8 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE char16 -#define __CLC_U_GENTYPE uchar16 -#define __CLC_S_GENTYPE char16 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#undef __CLC_SCALAR_GENTYPE -#define __CLC_SCALAR_GENTYPE uchar - -#define __CLC_GENTYPE uchar -#define __CLC_U_GENTYPE uchar -#define __CLC_S_GENTYPE char -#define __CLC_SCALAR 1 -#include __CLC_BODY -#undef __CLC_SCALAR -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE uchar2 -#define __CLC_U_GENTYPE uchar2 -#define __CLC_S_GENTYPE char2 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE uchar3 -#define __CLC_U_GENTYPE uchar3 -#define __CLC_S_GENTYPE char3 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE uchar4 -#define __CLC_U_GENTYPE uchar4 -#define __CLC_S_GENTYPE char4 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE uchar8 -#define __CLC_U_GENTYPE uchar8 -#define __CLC_S_GENTYPE char8 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE uchar16 -#define __CLC_U_GENTYPE uchar16 -#define __CLC_S_GENTYPE char16 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#undef __CLC_GENSIZE -#define __CLC_GENSIZE 16 -#undef __CLC_SCALAR_GENTYPE -#define __CLC_SCALAR_GENTYPE short - -#define __CLC_GENTYPE short -#define __CLC_U_GENTYPE ushort -#define __CLC_S_GENTYPE short -#define __CLC_SCALAR 1 -#include __CLC_BODY -#undef __CLC_SCALAR -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE short2 -#define __CLC_U_GENTYPE ushort2 -#define __CLC_S_GENTYPE short2 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE short3 -#define __CLC_U_GENTYPE ushort3 -#define __CLC_S_GENTYPE short3 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE short4 -#define __CLC_U_GENTYPE ushort4 -#define __CLC_S_GENTYPE short4 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE short8 -#define __CLC_U_GENTYPE ushort8 -#define __CLC_S_GENTYPE short8 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE short16 -#define __CLC_U_GENTYPE ushort16 -#define __CLC_S_GENTYPE short16 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#undef __CLC_SCALAR_GENTYPE -#define __CLC_SCALAR_GENTYPE ushort - -#define __CLC_GENTYPE ushort -#define __CLC_U_GENTYPE ushort -#define __CLC_S_GENTYPE short -#define __CLC_SCALAR 1 -#include __CLC_BODY -#undef __CLC_SCALAR -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE ushort2 -#define __CLC_U_GENTYPE ushort2 -#define __CLC_S_GENTYPE short2 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE ushort3 -#define __CLC_U_GENTYPE ushort3 -#define __CLC_S_GENTYPE short3 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE ushort4 -#define __CLC_U_GENTYPE ushort4 -#define __CLC_S_GENTYPE short4 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE ushort8 -#define __CLC_U_GENTYPE ushort8 -#define __CLC_S_GENTYPE short8 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE ushort16 -#define __CLC_U_GENTYPE ushort16 -#define __CLC_S_GENTYPE short16 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#undef __CLC_GENSIZE -#define __CLC_GENSIZE 32 -#undef __CLC_SCALAR_GENTYPE -#define __CLC_SCALAR_GENTYPE int - -#define __CLC_GENTYPE int -#define __CLC_U_GENTYPE uint -#define __CLC_S_GENTYPE int -#define __CLC_SCALAR 1 -#include __CLC_BODY -#undef __CLC_SCALAR -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE int2 -#define __CLC_U_GENTYPE uint2 -#define __CLC_S_GENTYPE int2 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE int3 -#define __CLC_U_GENTYPE uint3 -#define __CLC_S_GENTYPE int3 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE int4 -#define __CLC_U_GENTYPE uint4 -#define __CLC_S_GENTYPE int4 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE int8 -#define __CLC_U_GENTYPE uint8 -#define __CLC_S_GENTYPE int8 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE int16 -#define __CLC_U_GENTYPE uint16 -#define __CLC_S_GENTYPE int16 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#undef __CLC_SCALAR_GENTYPE -#define __CLC_SCALAR_GENTYPE uint - -#define __CLC_GENTYPE uint -#define __CLC_U_GENTYPE uint -#define __CLC_S_GENTYPE int -#define __CLC_SCALAR 1 -#include __CLC_BODY -#undef __CLC_SCALAR -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE uint2 -#define __CLC_U_GENTYPE uint2 -#define __CLC_S_GENTYPE int2 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE uint3 -#define __CLC_U_GENTYPE uint3 -#define __CLC_S_GENTYPE int3 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE uint4 -#define __CLC_U_GENTYPE uint4 -#define __CLC_S_GENTYPE int4 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE uint8 -#define __CLC_U_GENTYPE uint8 -#define __CLC_S_GENTYPE int8 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE uint16 -#define __CLC_U_GENTYPE uint16 -#define __CLC_S_GENTYPE int16 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#undef __CLC_GENSIZE -#define __CLC_GENSIZE 64 -#undef __CLC_SCALAR_GENTYPE -#define __CLC_SCALAR_GENTYPE long - -#define __CLC_GENTYPE long -#define __CLC_U_GENTYPE ulong -#define __CLC_S_GENTYPE long -#define __CLC_SCALAR 1 -#include __CLC_BODY -#undef __CLC_SCALAR -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE long2 -#define __CLC_U_GENTYPE ulong2 -#define __CLC_S_GENTYPE long2 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE long3 -#define __CLC_U_GENTYPE ulong3 -#define __CLC_S_GENTYPE long3 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE long4 -#define __CLC_U_GENTYPE ulong4 -#define __CLC_S_GENTYPE long4 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE long8 -#define __CLC_U_GENTYPE ulong8 -#define __CLC_S_GENTYPE long8 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE long16 -#define __CLC_U_GENTYPE ulong16 -#define __CLC_S_GENTYPE long16 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#undef __CLC_SCALAR_GENTYPE -#define __CLC_SCALAR_GENTYPE ulong - -#define __CLC_GENTYPE ulong -#define __CLC_U_GENTYPE ulong -#define __CLC_S_GENTYPE long -#define __CLC_SCALAR 1 -#include __CLC_BODY -#undef __CLC_SCALAR -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE ulong2 -#define __CLC_U_GENTYPE ulong2 -#define __CLC_S_GENTYPE long2 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE ulong3 -#define __CLC_U_GENTYPE ulong3 -#define __CLC_S_GENTYPE long3 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE ulong4 -#define __CLC_U_GENTYPE ulong4 -#define __CLC_S_GENTYPE long4 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE ulong8 -#define __CLC_U_GENTYPE ulong8 -#define __CLC_S_GENTYPE long8 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#define __CLC_GENTYPE ulong16 -#define __CLC_U_GENTYPE ulong16 -#define __CLC_S_GENTYPE long16 -#include __CLC_BODY -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE - -#undef __CLC_GENSIZE -#undef __CLC_SCALAR_GENTYPE -#undef __CLC_BODY diff --git a/generic/include/clc/integer/integer-gentype.inc b/generic/include/clc/integer/integer-gentype.inc new file mode 100644 index 0000000..6f4d699 --- /dev/null +++ b/generic/include/clc/integer/integer-gentype.inc @@ -0,0 +1,435 @@ +//These 2 defines only change when switching between data sizes or base types to +//keep this file manageable. +#define __CLC_GENSIZE 8 +#define __CLC_SCALAR_GENTYPE char + +#define __CLC_GENTYPE char +#define __CLC_U_GENTYPE uchar +#define __CLC_S_GENTYPE char +#define __CLC_SCALAR 1 +#include __CLC_BODY +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE char2 +#define __CLC_U_GENTYPE uchar2 +#define __CLC_S_GENTYPE char2 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE char3 +#define __CLC_U_GENTYPE uchar3 +#define __CLC_S_GENTYPE char3 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE char4 +#define __CLC_U_GENTYPE uchar4 +#define __CLC_S_GENTYPE char4 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE char8 +#define __CLC_U_GENTYPE uchar8 +#define __CLC_S_GENTYPE char8 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE char16 +#define __CLC_U_GENTYPE uchar16 +#define __CLC_S_GENTYPE char16 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_SCALAR_GENTYPE +#define __CLC_SCALAR_GENTYPE uchar + +#define __CLC_GENTYPE uchar +#define __CLC_U_GENTYPE uchar +#define __CLC_S_GENTYPE char +#define __CLC_SCALAR 1 +#include __CLC_BODY +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uchar2 +#define __CLC_U_GENTYPE uchar2 +#define __CLC_S_GENTYPE char2 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uchar3 +#define __CLC_U_GENTYPE uchar3 +#define __CLC_S_GENTYPE char3 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uchar4 +#define __CLC_U_GENTYPE uchar4 +#define __CLC_S_GENTYPE char4 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uchar8 +#define __CLC_U_GENTYPE uchar8 +#define __CLC_S_GENTYPE char8 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uchar16 +#define __CLC_U_GENTYPE uchar16 +#define __CLC_S_GENTYPE char16 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_GENSIZE +#define __CLC_GENSIZE 16 +#undef __CLC_SCALAR_GENTYPE +#define __CLC_SCALAR_GENTYPE short + +#define __CLC_GENTYPE short +#define __CLC_U_GENTYPE ushort +#define __CLC_S_GENTYPE short +#define __CLC_SCALAR 1 +#include __CLC_BODY +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE short2 +#define __CLC_U_GENTYPE ushort2 +#define __CLC_S_GENTYPE short2 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE short3 +#define __CLC_U_GENTYPE ushort3 +#define __CLC_S_GENTYPE short3 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE short4 +#define __CLC_U_GENTYPE ushort4 +#define __CLC_S_GENTYPE short4 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE short8 +#define __CLC_U_GENTYPE ushort8 +#define __CLC_S_GENTYPE short8 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE short16 +#define __CLC_U_GENTYPE ushort16 +#define __CLC_S_GENTYPE short16 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_SCALAR_GENTYPE +#define __CLC_SCALAR_GENTYPE ushort + +#define __CLC_GENTYPE ushort +#define __CLC_U_GENTYPE ushort +#define __CLC_S_GENTYPE short +#define __CLC_SCALAR 1 +#include __CLC_BODY +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE ushort2 +#define __CLC_U_GENTYPE ushort2 +#define __CLC_S_GENTYPE short2 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE ushort3 +#define __CLC_U_GENTYPE ushort3 +#define __CLC_S_GENTYPE short3 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE ushort4 +#define __CLC_U_GENTYPE ushort4 +#define __CLC_S_GENTYPE short4 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE ushort8 +#define __CLC_U_GENTYPE ushort8 +#define __CLC_S_GENTYPE short8 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE ushort16 +#define __CLC_U_GENTYPE ushort16 +#define __CLC_S_GENTYPE short16 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_GENSIZE +#define __CLC_GENSIZE 32 +#undef __CLC_SCALAR_GENTYPE +#define __CLC_SCALAR_GENTYPE int + +#define __CLC_GENTYPE int +#define __CLC_U_GENTYPE uint +#define __CLC_S_GENTYPE int +#define __CLC_SCALAR 1 +#include __CLC_BODY +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int2 +#define __CLC_U_GENTYPE uint2 +#define __CLC_S_GENTYPE int2 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int3 +#define __CLC_U_GENTYPE uint3 +#define __CLC_S_GENTYPE int3 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int4 +#define __CLC_U_GENTYPE uint4 +#define __CLC_S_GENTYPE int4 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int8 +#define __CLC_U_GENTYPE uint8 +#define __CLC_S_GENTYPE int8 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int16 +#define __CLC_U_GENTYPE uint16 +#define __CLC_S_GENTYPE int16 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_SCALAR_GENTYPE +#define __CLC_SCALAR_GENTYPE uint + +#define __CLC_GENTYPE uint +#define __CLC_U_GENTYPE uint +#define __CLC_S_GENTYPE int +#define __CLC_SCALAR 1 +#include __CLC_BODY +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint2 +#define __CLC_U_GENTYPE uint2 +#define __CLC_S_GENTYPE int2 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint3 +#define __CLC_U_GENTYPE uint3 +#define __CLC_S_GENTYPE int3 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint4 +#define __CLC_U_GENTYPE uint4 +#define __CLC_S_GENTYPE int4 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint8 +#define __CLC_U_GENTYPE uint8 +#define __CLC_S_GENTYPE int8 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint16 +#define __CLC_U_GENTYPE uint16 +#define __CLC_S_GENTYPE int16 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_GENSIZE +#define __CLC_GENSIZE 64 +#undef __CLC_SCALAR_GENTYPE +#define __CLC_SCALAR_GENTYPE long + +#define __CLC_GENTYPE long +#define __CLC_U_GENTYPE ulong +#define __CLC_S_GENTYPE long +#define __CLC_SCALAR 1 +#include __CLC_BODY +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE long2 +#define __CLC_U_GENTYPE ulong2 +#define __CLC_S_GENTYPE long2 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE long3 +#define __CLC_U_GENTYPE ulong3 +#define __CLC_S_GENTYPE long3 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE long4 +#define __CLC_U_GENTYPE ulong4 +#define __CLC_S_GENTYPE long4 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE long8 +#define __CLC_U_GENTYPE ulong8 +#define __CLC_S_GENTYPE long8 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE long16 +#define __CLC_U_GENTYPE ulong16 +#define __CLC_S_GENTYPE long16 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_SCALAR_GENTYPE +#define __CLC_SCALAR_GENTYPE ulong + +#define __CLC_GENTYPE ulong +#define __CLC_U_GENTYPE ulong +#define __CLC_S_GENTYPE long +#define __CLC_SCALAR 1 +#include __CLC_BODY +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE ulong2 +#define __CLC_U_GENTYPE ulong2 +#define __CLC_S_GENTYPE long2 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE ulong3 +#define __CLC_U_GENTYPE ulong3 +#define __CLC_S_GENTYPE long3 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE ulong4 +#define __CLC_U_GENTYPE ulong4 +#define __CLC_S_GENTYPE long4 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE ulong8 +#define __CLC_U_GENTYPE ulong8 +#define __CLC_S_GENTYPE long8 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE ulong16 +#define __CLC_U_GENTYPE ulong16 +#define __CLC_S_GENTYPE long16 +#include __CLC_BODY +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_GENSIZE +#undef __CLC_SCALAR_GENTYPE +#undef __CLC_BODY diff --git a/generic/include/clc/integer/mad24.h b/generic/include/clc/integer/mad24.h index 05ec86b..1fde569 100644 --- a/generic/include/clc/integer/mad24.h +++ b/generic/include/clc/integer/mad24.h @@ -1,25 +1,3 @@ - -_CLC_OVERLOAD _CLC_DECL int mad24(int x, int y, int z); - -_CLC_OVERLOAD _CLC_DECL int2 mad24(int2 x, int2 y, int2 z); - -_CLC_OVERLOAD _CLC_DECL int3 mad24(int3 x, int3 y, int3 z); - -_CLC_OVERLOAD _CLC_DECL int4 mad24(int4 x, int4 y, int4 z); - -_CLC_OVERLOAD _CLC_DECL int8 mad24(int8 x, int8 y, int8 z); - -_CLC_OVERLOAD _CLC_DECL int16 mad24(int16 x, int16 y, int16 z); - -_CLC_OVERLOAD _CLC_DECL uint mad24(uint x, uint y, uint z); - -_CLC_OVERLOAD _CLC_DECL uint2 mad24(uint2 x, uint2 y, uint2 z); - -_CLC_OVERLOAD _CLC_DECL uint3 mad24(uint3 x, uint3 y, uint3 z); - -_CLC_OVERLOAD _CLC_DECL uint4 mad24(uint4 x, uint4 y, uint4 z); - -_CLC_OVERLOAD _CLC_DECL uint8 mad24(uint8 x, uint8 y, uint8 z); - -_CLC_OVERLOAD _CLC_DECL uint16 mad24(uint16 x, uint16 y, uint16 z); - +#define __CLC_BODY +#include +#undef __CLC_BODY \ No newline at end of file diff --git a/generic/include/clc/integer/mad24.inc b/generic/include/clc/integer/mad24.inc new file mode 100644 index 0000000..26eaf48 --- /dev/null +++ b/generic/include/clc/integer/mad24.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z); \ No newline at end of file diff --git a/generic/include/clc/integer/mul24.h b/generic/include/clc/integer/mul24.h index e025d77..8700aef 100644 --- a/generic/include/clc/integer/mul24.h +++ b/generic/include/clc/integer/mul24.h @@ -1,25 +1,3 @@ - -_CLC_OVERLOAD _CLC_DECL int mul24(int x, int y); - -_CLC_OVERLOAD _CLC_DECL int2 mul24(int2 x, int2 y); - -_CLC_OVERLOAD _CLC_DECL int3 mul24(int3 x, int3 y); - -_CLC_OVERLOAD _CLC_DECL int4 mul24(int4 x, int4 y); - -_CLC_OVERLOAD _CLC_DECL int8 mul24(int8 x, int8 y); - -_CLC_OVERLOAD _CLC_DECL int16 mul24(int16 x, int16 y); - -_CLC_OVERLOAD _CLC_DECL uint mul24(uint x, uint y); - -_CLC_OVERLOAD _CLC_DECL uint2 mul24(uint2 x, uint2 y); - -_CLC_OVERLOAD _CLC_DECL uint3 mul24(uint3 x, uint3 y); - -_CLC_OVERLOAD _CLC_DECL uint4 mul24(uint4 x, uint4 y); - -_CLC_OVERLOAD _CLC_DECL uint8 mul24(uint8 x, uint8 y); - -_CLC_OVERLOAD _CLC_DECL uint16 mul24(uint16 x, uint16 y); - +#define __CLC_BODY +#include +#undef __CLC_BODY \ No newline at end of file diff --git a/generic/include/clc/integer/mul24.inc b/generic/include/clc/integer/mul24.inc new file mode 100644 index 0000000..4154997 --- /dev/null +++ b/generic/include/clc/integer/mul24.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y); \ No newline at end of file diff --git a/generic/include/clc/integer/rotate.h b/generic/include/clc/integer/rotate.h index 6320223..42037f9 100644 --- a/generic/include/clc/integer/rotate.h +++ b/generic/include/clc/integer/rotate.h @@ -1,2 +1,2 @@ #define __CLC_BODY -#include +#include diff --git a/generic/include/clc/integer/sub_sat.h b/generic/include/clc/integer/sub_sat.h index f841529..cce4d2c 100644 --- a/generic/include/clc/integer/sub_sat.h +++ b/generic/include/clc/integer/sub_sat.h @@ -1,2 +1,2 @@ #define __CLC_BODY -#include +#include diff --git a/generic/include/clc/shared/clamp.h b/generic/include/clc/shared/clamp.h index a389b85..97611d8 100644 --- a/generic/include/clc/shared/clamp.h +++ b/generic/include/clc/shared/clamp.h @@ -1,5 +1,5 @@ #define __CLC_BODY -#include +#include #define __CLC_BODY #include diff --git a/generic/include/clc/shared/max.h b/generic/include/clc/shared/max.h index ee20b9e..684d2e1 100644 --- a/generic/include/clc/shared/max.h +++ b/generic/include/clc/shared/max.h @@ -1,5 +1,5 @@ #define __CLC_BODY -#include +#include #define __CLC_BODY #include diff --git a/generic/include/clc/shared/min.h b/generic/include/clc/shared/min.h index e11d9f9..b4b2d41 100644 --- a/generic/include/clc/shared/min.h +++ b/generic/include/clc/shared/min.h @@ -1,5 +1,5 @@ #define __CLC_BODY -#include +#include #define __CLC_BODY #include diff --git a/generic/lib/integer/abs.cl b/generic/lib/integer/abs.cl index faff8d0..94ea113 100644 --- a/generic/lib/integer/abs.cl +++ b/generic/lib/integer/abs.cl @@ -1,4 +1,4 @@ #include #define __CLC_BODY -#include +#include diff --git a/generic/lib/integer/abs_diff.cl b/generic/lib/integer/abs_diff.cl index 3d75105..1ce5d34 100644 --- a/generic/lib/integer/abs_diff.cl +++ b/generic/lib/integer/abs_diff.cl @@ -1,4 +1,4 @@ #include #define __CLC_BODY -#include +#include diff --git a/generic/lib/integer/mad24.cl b/generic/lib/integer/mad24.cl index bed5b79..94dd06a 100644 --- a/generic/lib/integer/mad24.cl +++ b/generic/lib/integer/mad24.cl @@ -1,55 +1,4 @@ #include - -#define BODY - -#define GENTYPE int -#include BODY -#undef GENTYPE - -#define GENTYPE int2 -#include BODY -#undef GENTYPE - -#define GENTYPE int3 -#include BODY -#undef GENTYPE - -#define GENTYPE int4 -#include BODY -#undef GENTYPE - -#define GENTYPE int8 -#include BODY -#undef GENTYPE - -#define GENTYPE int16 -#include BODY -#undef GENTYPE - -#define GENTYPE uint -#include BODY -#undef GENTYPE - -#define GENTYPE uint2 -#include BODY -#undef GENTYPE - -#define GENTYPE uint3 -#include BODY -#undef GENTYPE - -#define GENTYPE uint4 -#include BODY -#undef GENTYPE - -#define GENTYPE uint8 -#include BODY -#undef GENTYPE - -#define GENTYPE uint16 -#include BODY -#undef GENTYPE - - -#undef BODY +#define __CLC_BODY +#include \ No newline at end of file diff --git a/generic/lib/integer/mad24.inc b/generic/lib/integer/mad24.inc index f2bf91e..174af49 100644 --- a/generic/lib/integer/mad24.inc +++ b/generic/lib/integer/mad24.inc @@ -1,3 +1,3 @@ -_CLC_OVERLOAD _CLC_DEF GENTYPE mad24(GENTYPE x, GENTYPE y, GENTYPE z) { - return x * y + z; -} +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z){ + return mul24(x, y) + z; +} \ No newline at end of file diff --git a/generic/lib/integer/mul24.cl b/generic/lib/integer/mul24.cl index a2cad96..99db0b2 100644 --- a/generic/lib/integer/mul24.cl +++ b/generic/lib/integer/mul24.cl @@ -1,55 +1,4 @@ #include - -#define BODY - -#define GENTYPE int -#include BODY -#undef GENTYPE - -#define GENTYPE int2 -#include BODY -#undef GENTYPE - -#define GENTYPE int3 -#include BODY -#undef GENTYPE - -#define GENTYPE int4 -#include BODY -#undef GENTYPE - -#define GENTYPE int8 -#include BODY -#undef GENTYPE - -#define GENTYPE int16 -#include BODY -#undef GENTYPE - -#define GENTYPE uint -#include BODY -#undef GENTYPE - -#define GENTYPE uint2 -#include BODY -#undef GENTYPE - -#define GENTYPE uint3 -#include BODY -#undef GENTYPE - -#define GENTYPE uint4 -#include BODY -#undef GENTYPE - -#define GENTYPE uint8 -#include BODY -#undef GENTYPE - -#define GENTYPE uint16 -#include BODY -#undef GENTYPE - - -#undef BODY +#define __CLC_BODY +#include \ No newline at end of file diff --git a/generic/lib/integer/mul24.inc b/generic/lib/integer/mul24.inc index b568ca8..1496410 100644 --- a/generic/lib/integer/mul24.inc +++ b/generic/lib/integer/mul24.inc @@ -1,3 +1,11 @@ -_CLC_OVERLOAD _CLC_DEF GENTYPE mul24(GENTYPE x, GENTYPE y) { - return x * y; + +// We need to use shifts here in order to mantain the sign bit for signed +// integers. The compiler should optimize this to (x & 0x00FFFFFF) for +// unsgined integers. +#define CONVERT_TO_24BIT(x) (((x) << (__CLC_GENTYPE)8) >> (__CLC_GENTYPE)8) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y){ + return CONVERT_TO_24BIT(x) * CONVERT_TO_24BIT(y); } + +#undef CONVERT_TO_24BIT \ No newline at end of file diff --git a/generic/lib/integer/rotate.cl b/generic/lib/integer/rotate.cl index 27ce515..c6d063e 100644 --- a/generic/lib/integer/rotate.cl +++ b/generic/lib/integer/rotate.cl @@ -1,4 +1,4 @@ #include #define __CLC_BODY -#include +#include diff --git a/generic/lib/shared/clamp.cl b/generic/lib/shared/clamp.cl index c79a358..b566538 100644 --- a/generic/lib/shared/clamp.cl +++ b/generic/lib/shared/clamp.cl @@ -1,7 +1,7 @@ #include #define __CLC_BODY -#include +#include #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable diff --git a/generic/lib/shared/max.cl b/generic/lib/shared/max.cl index 1c4457c..54d8457 100644 --- a/generic/lib/shared/max.cl +++ b/generic/lib/shared/max.cl @@ -1,7 +1,7 @@ #include #define __CLC_BODY -#include +#include #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable diff --git a/generic/lib/shared/min.cl b/generic/lib/shared/min.cl index 433087a..cc821b2 100644 --- a/generic/lib/shared/min.cl +++ b/generic/lib/shared/min.cl @@ -1,7 +1,7 @@ #include #define __CLC_BODY -#include +#include #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable -- cgit v1.2.3