diff options
-rw-r--r-- | kernels/compiler_bsort.cl | 47 | ||||
-rw-r--r-- | utests/CMakeLists.txt | 3 | ||||
-rw-r--r-- | utests/compiler_bsort.cpp | 45 |
3 files changed, 94 insertions, 1 deletions
diff --git a/kernels/compiler_bsort.cl b/kernels/compiler_bsort.cl new file mode 100644 index 00000000..db40da80 --- /dev/null +++ b/kernels/compiler_bsort.cl @@ -0,0 +1,47 @@ +#define UP 0 +#define DOWN -1 + +/* Sort elements in a vector */ +#define SORT_VECTOR(input, dir) \ + comp = input < shuffle(input, mask1) ^ dir; \ + input = shuffle(input, as_uint4(comp + add1)); \ + comp = input < shuffle(input, mask2) ^ dir; \ + input = shuffle(input, as_uint4(comp * 2 + add2)); \ + comp = input < shuffle(input, mask3) ^ dir; \ + input = shuffle(input, as_uint4(comp + add3)); \ + +/* Sort elements between two vectors */ +#define SWAP_VECTORS(input1, input2, dir) \ + temp = input1; \ + comp = (input1 < input2 ^ dir) * 4 + add4; \ + input1 = shuffle2(input1, input2, as_uint4(comp)); \ + input2 = shuffle2(input2, temp, as_uint4(comp)); \ + +__kernel void compiler_bsort(__global float4 *data) { + + float4 input1, input2, temp; + int4 comp; + + uint4 mask1 = (uint4)(1, 0, 3, 2); + uint4 mask2 = (uint4)(2, 3, 0, 1); + uint4 mask3 = (uint4)(3, 2, 1, 0); + + int4 add1 = (int4)(1, 1, 3, 3); + int4 add2 = (int4)(2, 3, 2, 3); + int4 add3 = (int4)(1, 2, 2, 3); + int4 add4 = (int4)(4, 5, 6, 7); + + input1 = data[0]; + input2 = data[1]; + + SORT_VECTOR(input1, UP) + SORT_VECTOR(input2, DOWN) + + SWAP_VECTORS(input1, input2, UP) + + SORT_VECTOR(input1, UP) + SORT_VECTOR(input2, UP) + + data[0] = input1; + data[1] = input2; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 74189c62..d846b7b7 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -225,7 +225,8 @@ set (utests_sources compiler_sub_group_shuffle.cpp builtin_global_linear_id.cpp builtin_local_linear_id.cpp - compiler_mix.cpp) + compiler_mix.cpp + compiler_bsort.cpp) if (LLVM_VERSION_NODOT VERSION_GREATER 34) SET(utests_sources diff --git a/utests/compiler_bsort.cpp b/utests/compiler_bsort.cpp new file mode 100644 index 00000000..31607aaf --- /dev/null +++ b/utests/compiler_bsort.cpp @@ -0,0 +1,45 @@ +#include "utest_helper.hpp" +/* + * This test is for non-constant extractelement scalarize + * this bitonic sort test will use this path in + * + * comp = input < shuffle(input, mask1) ^ dir; \ + * input = shuffle(input, as_uint4(comp + add1)); \ + * + * The origin buff is + * {3.0 5.0 4.0 6.0 0.0 7.0 2.0 1.0} + * and the expected result is + * {0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0} + */ +void compiler_bsort(void) +{ + const int n = 8; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_bsort"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + globals[0] = 1; + locals[0] = 1; + + OCL_MAP_BUFFER(0); + ((float *)(buf_data[0]))[0] = 3.0f; + ((float *)(buf_data[0]))[1] = 5.0f; + ((float *)(buf_data[0]))[2] = 4.0f; + ((float *)(buf_data[0]))[3] = 6.0f; + ((float *)(buf_data[0]))[4] = 0.0f; + ((float *)(buf_data[0]))[5] = 7.0f; + ((float *)(buf_data[0]))[6] = 2.0f; + ((float *)(buf_data[0]))[7] = 1.0f; + OCL_UNMAP_BUFFER(0); + + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(0); + for (int i = 0; i < n; i ++) { + OCL_ASSERT(((float *)(buf_data[0]))[i] == (float)i); + } + OCL_UNMAP_BUFFER(0); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_bsort); |