summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--generic/include/clc/clc.h1
-rw-r--r--generic/include/clc/integer/gentype.inc11
-rw-r--r--generic/include/clc/integer/rotate.h2
-rw-r--r--generic/include/clc/integer/rotate.inc1
-rw-r--r--generic/lib/SOURCES1
-rw-r--r--generic/lib/integer/rotate.cl4
-rw-r--r--generic/lib/integer/rotate.inc35
7 files changed, 55 insertions, 0 deletions
diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index c3d7d59..72f518a 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -63,6 +63,7 @@
#include <clc/integer/abs.h>
#include <clc/integer/abs_diff.h>
#include <clc/integer/add_sat.h>
+#include <clc/integer/rotate.h>
#include <clc/integer/sub_sat.h>
/* 6.11.2 and 6.11.3 Shared Integer/Math Functions */
diff --git a/generic/include/clc/integer/gentype.inc b/generic/include/clc/integer/gentype.inc
index 0b32efd..005b9af 100644
--- a/generic/include/clc/integer/gentype.inc
+++ b/generic/include/clc/integer/gentype.inc
@@ -1,3 +1,4 @@
+#define GENSIZE 8
#define GENTYPE char
#define UGENTYPE uchar
#define SGENTYPE char
@@ -94,6 +95,9 @@
#undef UGENTYPE
#undef SGENTYPE
+#undef GENSIZE
+#define GENSIZE 16
+
#define GENTYPE short
#define UGENTYPE ushort
#define SGENTYPE short
@@ -190,6 +194,9 @@
#undef UGENTYPE
#undef SGENTYPE
+#undef GENSIZE
+#define GENSIZE 32
+
#define GENTYPE int
#define UGENTYPE uint
#define SGENTYPE int
@@ -286,6 +293,9 @@
#undef UGENTYPE
#undef SGENTYPE
+#undef GENSIZE
+#define GENSIZE 64
+
#define GENTYPE long
#define UGENTYPE ulong
#define SGENTYPE long
@@ -382,4 +392,5 @@
#undef UGENTYPE
#undef SGENTYPE
+#undef GENSIZE
#undef BODY
diff --git a/generic/include/clc/integer/rotate.h b/generic/include/clc/integer/rotate.h
new file mode 100644
index 0000000..e163bc8
--- /dev/null
+++ b/generic/include/clc/integer/rotate.h
@@ -0,0 +1,2 @@
+#define BODY <clc/integer/rotate.inc>
+#include <clc/integer/gentype.inc>
diff --git a/generic/include/clc/integer/rotate.inc b/generic/include/clc/integer/rotate.inc
new file mode 100644
index 0000000..5720e1c
--- /dev/null
+++ b/generic/include/clc/integer/rotate.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL GENTYPE rotate(GENTYPE x, GENTYPE y);
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index f639c83..495b3e7 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -8,6 +8,7 @@ integer/abs_diff.cl
integer/add_sat.cl
integer/add_sat.ll
integer/add_sat_impl.ll
+integer/rotate.cl
integer/sub_sat.cl
integer/sub_sat.ll
integer/sub_sat_impl.ll
diff --git a/generic/lib/integer/rotate.cl b/generic/lib/integer/rotate.cl
new file mode 100644
index 0000000..d7eff2b
--- /dev/null
+++ b/generic/lib/integer/rotate.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define BODY <rotate.inc>
+#include <clc/integer/gentype.inc>
diff --git a/generic/lib/integer/rotate.inc b/generic/lib/integer/rotate.inc
new file mode 100644
index 0000000..e83dd51
--- /dev/null
+++ b/generic/lib/integer/rotate.inc
@@ -0,0 +1,35 @@
+/**
+ * Not necessarily optimal... but it produces correct results (at least for int)
+ * If we're lucky, LLVM will recognize the pattern and produce rotate
+ * instructions:
+ * http://llvm.1065342.n5.nabble.com/rotate-td47679.html
+ *
+ * Eventually, someone should feel free to implement an llvm-specific version
+ */
+
+_CLC_OVERLOAD _CLC_DEF GENTYPE rotate(GENTYPE x, GENTYPE n){
+ //Try to avoid extra work if someone's spinning the value through multiple
+ //full rotations
+ n = n % (GENTYPE)GENSIZE;
+
+ //Determine if we're doing a right or left shift on each component
+ //The actual shift algorithm is based on a rotate right
+ //e.g. a rotate of int by 5 bits becomes rotate right by 26 bits
+ // and a rotate of int by -4 bits becomes rotate right by 4
+ GENTYPE amt = (n > (GENTYPE)0 ? (GENTYPE)GENSIZE - n : (GENTYPE)0 - n );
+
+ //Calculate the bits that will wrap
+ GENTYPE mask = ( (GENTYPE)1 << amt ) - (GENTYPE)1;
+ GENTYPE wrapped_bits = x & mask;
+
+ //Shift the input value right and then AND a mask that eliminates
+ //sign-extension interference
+ //if the rotate amount is 0, just use ~0 for a mask
+ GENTYPE se_mask = !amt ? ~((GENTYPE)0) :
+ ( ( (GENTYPE)1 << ((GENTYPE)GENSIZE - amt) ) - (GENTYPE)1 );
+ GENTYPE unwrapped_bits = x >> amt;
+ unwrapped_bits &= se_mask;
+
+ //Finally shift the input right after moving the wrapped bits into position
+ return unwrapped_bits | (wrapped_bits << ( (GENTYPE)GENSIZE - amt ) );
+} \ No newline at end of file