summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernels/compiler_bsort.cl47
-rw-r--r--utests/CMakeLists.txt3
-rw-r--r--utests/compiler_bsort.cpp45
3 files changed, 94 insertions, 1 deletions
diff --git a/kernels/compiler_bsort.cl b/kernels/compiler_bsort.cl
new file mode 100644
index 00000000..db40da80
--- /dev/null
+++ b/kernels/compiler_bsort.cl
@@ -0,0 +1,47 @@
+#define UP 0
+#define DOWN -1
+
+/* Sort elements in a vector */
+#define SORT_VECTOR(input, dir) \
+ comp = input < shuffle(input, mask1) ^ dir; \
+ input = shuffle(input, as_uint4(comp + add1)); \
+ comp = input < shuffle(input, mask2) ^ dir; \
+ input = shuffle(input, as_uint4(comp * 2 + add2)); \
+ comp = input < shuffle(input, mask3) ^ dir; \
+ input = shuffle(input, as_uint4(comp + add3)); \
+
+/* Sort elements between two vectors */
+#define SWAP_VECTORS(input1, input2, dir) \
+ temp = input1; \
+ comp = (input1 < input2 ^ dir) * 4 + add4; \
+ input1 = shuffle2(input1, input2, as_uint4(comp)); \
+ input2 = shuffle2(input2, temp, as_uint4(comp)); \
+
+__kernel void compiler_bsort(__global float4 *data) {
+
+ float4 input1, input2, temp;
+ int4 comp;
+
+ uint4 mask1 = (uint4)(1, 0, 3, 2);
+ uint4 mask2 = (uint4)(2, 3, 0, 1);
+ uint4 mask3 = (uint4)(3, 2, 1, 0);
+
+ int4 add1 = (int4)(1, 1, 3, 3);
+ int4 add2 = (int4)(2, 3, 2, 3);
+ int4 add3 = (int4)(1, 2, 2, 3);
+ int4 add4 = (int4)(4, 5, 6, 7);
+
+ input1 = data[0];
+ input2 = data[1];
+
+ SORT_VECTOR(input1, UP)
+ SORT_VECTOR(input2, DOWN)
+
+ SWAP_VECTORS(input1, input2, UP)
+
+ SORT_VECTOR(input1, UP)
+ SORT_VECTOR(input2, UP)
+
+ data[0] = input1;
+ data[1] = input2;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 74189c62..d846b7b7 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -225,7 +225,8 @@ set (utests_sources
compiler_sub_group_shuffle.cpp
builtin_global_linear_id.cpp
builtin_local_linear_id.cpp
- compiler_mix.cpp)
+ compiler_mix.cpp
+ compiler_bsort.cpp)
if (LLVM_VERSION_NODOT VERSION_GREATER 34)
SET(utests_sources
diff --git a/utests/compiler_bsort.cpp b/utests/compiler_bsort.cpp
new file mode 100644
index 00000000..31607aaf
--- /dev/null
+++ b/utests/compiler_bsort.cpp
@@ -0,0 +1,45 @@
+#include "utest_helper.hpp"
+/*
+ * This test is for non-constant extractelement scalarize
+ * this bitonic sort test will use this path in
+ *
+ * comp = input < shuffle(input, mask1) ^ dir; \
+ * input = shuffle(input, as_uint4(comp + add1)); \
+ *
+ * The origin buff is
+ * {3.0 5.0 4.0 6.0 0.0 7.0 2.0 1.0}
+ * and the expected result is
+ * {0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0}
+ */
+void compiler_bsort(void)
+{
+ const int n = 8;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_bsort");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ globals[0] = 1;
+ locals[0] = 1;
+
+ OCL_MAP_BUFFER(0);
+ ((float *)(buf_data[0]))[0] = 3.0f;
+ ((float *)(buf_data[0]))[1] = 5.0f;
+ ((float *)(buf_data[0]))[2] = 4.0f;
+ ((float *)(buf_data[0]))[3] = 6.0f;
+ ((float *)(buf_data[0]))[4] = 0.0f;
+ ((float *)(buf_data[0]))[5] = 7.0f;
+ ((float *)(buf_data[0]))[6] = 2.0f;
+ ((float *)(buf_data[0]))[7] = 1.0f;
+ OCL_UNMAP_BUFFER(0);
+
+ OCL_NDRANGE(1);
+
+ OCL_MAP_BUFFER(0);
+ for (int i = 0; i < n; i ++) {
+ OCL_ASSERT(((float *)(buf_data[0]))[i] == (float)i);
+ }
+ OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_bsort);