summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron Watry <awatry@gmail.com>2013-03-27 20:17:03 -0500
committerTom Stellard <thomas.stellard@amd.com>2013-04-08 07:12:55 -0700
commit825f94136c4a083a860a7ae7f76527aea8845e35 (patch)
tree6b1f1ae7891ac859d71df4d752f698b9dc9d6c12
parente2f2f9498455eb3130b6f4b4732574771553b7e1 (diff)
Simplify rotate implementation a bit..
Much more understandable/readable as a result, and probably more efficient.
-rw-r--r--generic/include/clc/integer/gentype.inc16
-rw-r--r--generic/lib/integer/rotate.inc42
2 files changed, 37 insertions, 21 deletions
diff --git a/generic/include/clc/integer/gentype.inc b/generic/include/clc/integer/gentype.inc
index 005b9af..dd7d061 100644
--- a/generic/include/clc/integer/gentype.inc
+++ b/generic/include/clc/integer/gentype.inc
@@ -2,7 +2,9 @@
#define GENTYPE char
#define UGENTYPE uchar
#define SGENTYPE char
+#define SCALAR 1
#include BODY
+#undef SCALAR
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
@@ -50,7 +52,9 @@
#define GENTYPE uchar
#define UGENTYPE uchar
#define SGENTYPE char
+#define SCALAR 1
#include BODY
+#undef SCALAR
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
@@ -101,7 +105,9 @@
#define GENTYPE short
#define UGENTYPE ushort
#define SGENTYPE short
+#define SCALAR 1
#include BODY
+#undef SCALAR
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
@@ -149,7 +155,9 @@
#define GENTYPE ushort
#define UGENTYPE ushort
#define SGENTYPE short
+#define SCALAR 1
#include BODY
+#undef SCALAR
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
@@ -200,7 +208,9 @@
#define GENTYPE int
#define UGENTYPE uint
#define SGENTYPE int
+#define SCALAR 1
#include BODY
+#undef SCALAR
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
@@ -248,7 +258,9 @@
#define GENTYPE uint
#define UGENTYPE uint
#define SGENTYPE int
+#define SCALAR 1
#include BODY
+#undef SCALAR
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
@@ -299,7 +311,9 @@
#define GENTYPE long
#define UGENTYPE ulong
#define SGENTYPE long
+#define SCALAR 1
#include BODY
+#undef SCALAR
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
@@ -347,7 +361,9 @@
#define GENTYPE ulong
#define UGENTYPE ulong
#define SGENTYPE long
+#define SCALAR 1
#include BODY
+#undef SCALAR
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
diff --git a/generic/lib/integer/rotate.inc b/generic/lib/integer/rotate.inc
index e83dd51..7792a97 100644
--- a/generic/lib/integer/rotate.inc
+++ b/generic/lib/integer/rotate.inc
@@ -11,25 +11,25 @@ _CLC_OVERLOAD _CLC_DEF GENTYPE rotate(GENTYPE x, GENTYPE n){
//Try to avoid extra work if someone's spinning the value through multiple
//full rotations
n = n % (GENTYPE)GENSIZE;
-
- //Determine if we're doing a right or left shift on each component
- //The actual shift algorithm is based on a rotate right
- //e.g. a rotate of int by 5 bits becomes rotate right by 26 bits
- // and a rotate of int by -4 bits becomes rotate right by 4
- GENTYPE amt = (n > (GENTYPE)0 ? (GENTYPE)GENSIZE - n : (GENTYPE)0 - n );
-
- //Calculate the bits that will wrap
- GENTYPE mask = ( (GENTYPE)1 << amt ) - (GENTYPE)1;
- GENTYPE wrapped_bits = x & mask;
-
- //Shift the input value right and then AND a mask that eliminates
- //sign-extension interference
- //if the rotate amount is 0, just use ~0 for a mask
- GENTYPE se_mask = !amt ? ~((GENTYPE)0) :
- ( ( (GENTYPE)1 << ((GENTYPE)GENSIZE - amt) ) - (GENTYPE)1 );
- GENTYPE unwrapped_bits = x >> amt;
- unwrapped_bits &= se_mask;
-
- //Finally shift the input right after moving the wrapped bits into position
- return unwrapped_bits | (wrapped_bits << ( (GENTYPE)GENSIZE - amt ) );
+
+#ifdef SCALAR
+ if (n > 0){
+ return (x << n) | (((UGENTYPE)x) >> (GENSIZE - n));
+ } else if (n == 0){
+ return x;
+ } else {
+ return ( (((UGENTYPE)x) >> -n) | (x << (GENSIZE + n)) );
+ }
+#else
+ UGENTYPE x_1 = __builtin_astype(x, UGENTYPE);
+
+ UGENTYPE amt;
+ amt = (n < (GENTYPE)0 ? __builtin_astype((GENTYPE)0-n, UGENTYPE) : (UGENTYPE)0);
+ x_1 = (x_1 >> amt) | (x_1 << ((UGENTYPE)GENSIZE - amt));
+
+ amt = (n < (GENTYPE)0 ? (UGENTYPE)0 : __builtin_astype(n, UGENTYPE));
+ x_1 = (x_1 << amt) | (x_1 >> ((UGENTYPE)GENSIZE - amt));
+
+ return __builtin_astype(x_1, GENTYPE);
+#endif
} \ No newline at end of file