diff options
-rw-r--r-- | generic/include/clc/clc.h | 1 | ||||
-rw-r--r-- | generic/include/clc/integer/gentype.inc | 11 | ||||
-rw-r--r-- | generic/include/clc/integer/rotate.h | 2 | ||||
-rw-r--r-- | generic/include/clc/integer/rotate.inc | 1 | ||||
-rw-r--r-- | generic/lib/SOURCES | 1 | ||||
-rw-r--r-- | generic/lib/integer/rotate.cl | 4 | ||||
-rw-r--r-- | generic/lib/integer/rotate.inc | 35 |
7 files changed, 55 insertions, 0 deletions
diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h index c3d7d59..72f518a 100644 --- a/generic/include/clc/clc.h +++ b/generic/include/clc/clc.h @@ -63,6 +63,7 @@ #include <clc/integer/abs.h> #include <clc/integer/abs_diff.h> #include <clc/integer/add_sat.h> +#include <clc/integer/rotate.h> #include <clc/integer/sub_sat.h> /* 6.11.2 and 6.11.3 Shared Integer/Math Functions */ diff --git a/generic/include/clc/integer/gentype.inc b/generic/include/clc/integer/gentype.inc index 0b32efd..005b9af 100644 --- a/generic/include/clc/integer/gentype.inc +++ b/generic/include/clc/integer/gentype.inc @@ -1,3 +1,4 @@ +#define GENSIZE 8 #define GENTYPE char #define UGENTYPE uchar #define SGENTYPE char @@ -94,6 +95,9 @@ #undef UGENTYPE #undef SGENTYPE +#undef GENSIZE +#define GENSIZE 16 + #define GENTYPE short #define UGENTYPE ushort #define SGENTYPE short @@ -190,6 +194,9 @@ #undef UGENTYPE #undef SGENTYPE +#undef GENSIZE +#define GENSIZE 32 + #define GENTYPE int #define UGENTYPE uint #define SGENTYPE int @@ -286,6 +293,9 @@ #undef UGENTYPE #undef SGENTYPE +#undef GENSIZE +#define GENSIZE 64 + #define GENTYPE long #define UGENTYPE ulong #define SGENTYPE long @@ -382,4 +392,5 @@ #undef UGENTYPE #undef SGENTYPE +#undef GENSIZE #undef BODY diff --git a/generic/include/clc/integer/rotate.h b/generic/include/clc/integer/rotate.h new file mode 100644 index 0000000..e163bc8 --- /dev/null +++ b/generic/include/clc/integer/rotate.h @@ -0,0 +1,2 @@ +#define BODY <clc/integer/rotate.inc> +#include <clc/integer/gentype.inc> diff --git a/generic/include/clc/integer/rotate.inc b/generic/include/clc/integer/rotate.inc new file mode 100644 index 0000000..5720e1c --- /dev/null +++ b/generic/include/clc/integer/rotate.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL GENTYPE rotate(GENTYPE x, GENTYPE y); diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES index f639c83..495b3e7 100644 --- a/generic/lib/SOURCES +++ b/generic/lib/SOURCES @@ -8,6 +8,7 @@ integer/abs_diff.cl integer/add_sat.cl integer/add_sat.ll integer/add_sat_impl.ll +integer/rotate.cl integer/sub_sat.cl integer/sub_sat.ll integer/sub_sat_impl.ll diff --git a/generic/lib/integer/rotate.cl b/generic/lib/integer/rotate.cl new file mode 100644 index 0000000..d7eff2b --- /dev/null +++ b/generic/lib/integer/rotate.cl @@ -0,0 +1,4 @@ +#include <clc/clc.h> + +#define BODY <rotate.inc> +#include <clc/integer/gentype.inc> diff --git a/generic/lib/integer/rotate.inc b/generic/lib/integer/rotate.inc new file mode 100644 index 0000000..e83dd51 --- /dev/null +++ b/generic/lib/integer/rotate.inc @@ -0,0 +1,35 @@ +/** + * Not necessarily optimal... but it produces correct results (at least for int) + * If we're lucky, LLVM will recognize the pattern and produce rotate + * instructions: + * http://llvm.1065342.n5.nabble.com/rotate-td47679.html + * + * Eventually, someone should feel free to implement an llvm-specific version + */ + +_CLC_OVERLOAD _CLC_DEF GENTYPE rotate(GENTYPE x, GENTYPE n){ + //Try to avoid extra work if someone's spinning the value through multiple + //full rotations + n = n % (GENTYPE)GENSIZE; + + //Determine if we're doing a right or left shift on each component + //The actual shift algorithm is based on a rotate right + //e.g. a rotate of int by 5 bits becomes rotate right by 26 bits + // and a rotate of int by -4 bits becomes rotate right by 4 + GENTYPE amt = (n > (GENTYPE)0 ? (GENTYPE)GENSIZE - n : (GENTYPE)0 - n ); + + //Calculate the bits that will wrap + GENTYPE mask = ( (GENTYPE)1 << amt ) - (GENTYPE)1; + GENTYPE wrapped_bits = x & mask; + + //Shift the input value right and then AND a mask that eliminates + //sign-extension interference + //if the rotate amount is 0, just use ~0 for a mask + GENTYPE se_mask = !amt ? ~((GENTYPE)0) : + ( ( (GENTYPE)1 << ((GENTYPE)GENSIZE - amt) ) - (GENTYPE)1 ); + GENTYPE unwrapped_bits = x >> amt; + unwrapped_bits &= se_mask; + + //Finally shift the input right after moving the wrapped bits into position + return unwrapped_bits | (wrapped_bits << ( (GENTYPE)GENSIZE - amt ) ); +}
\ No newline at end of file |