diff options
author | Junyan He <junyan.he@linux.intel.com> | 2015-11-18 14:06:43 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2015-11-25 13:22:03 +0800 |
commit | 7840918e23639fee74e745c7fcf4d4ecbcc6c25d (patch) | |
tree | 6780161a0fe5b8c4eec40ef931d5375ab0b78227 | |
parent | f7672c62024b677774a705c4fc88deb12344fa04 (diff) |
libocl: Add the module for work_group functions.
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/libocl/CMakeLists.txt | 2 | ||||
-rw-r--r-- | backend/src/libocl/include/ocl.h | 1 | ||||
-rw-r--r-- | backend/src/libocl/include/ocl_work_group.h | 118 | ||||
-rw-r--r-- | backend/src/libocl/src/ocl_work_group.cl | 126 |
4 files changed, 246 insertions, 1 deletions
diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt index 0fffd9b1..1d1ec680 100644 --- a/backend/src/libocl/CMakeLists.txt +++ b/backend/src/libocl/CMakeLists.txt @@ -53,7 +53,7 @@ FOREACH(M ${OCL_COPY_HEADERS}) ENDFOREACH(M) SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_memcpy - ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image) + ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image ocl_work_group) FOREACH(M ${OCL_COPY_MODULES}) COPY_THE_HEADER(${M}) COPY_THE_SOURCE(${M}) diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h index 3a1f4bfd..abb2bd43 100644 --- a/backend/src/libocl/include/ocl.h +++ b/backend/src/libocl/include/ocl.h @@ -39,6 +39,7 @@ #include "ocl_vload.h" #include "ocl_workitem.h" #include "ocl_simd.h" +#include "ocl_work_group.h" #pragma OPENCL EXTENSION cl_khr_fp64 : disable #pragma OPENCL EXTENSION cl_khr_fp16 : disable #endif diff --git a/backend/src/libocl/include/ocl_work_group.h b/backend/src/libocl/include/ocl_work_group.h new file mode 100644 index 00000000..ebd264fd --- /dev/null +++ b/backend/src/libocl/include/ocl_work_group.h @@ -0,0 +1,118 @@ +/* + * Copyright © 2012 - 2014 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + */ +#ifndef __OCL_WORK_GROUP_H__ +#define __OCL_WORK_GROUP_H__ +#include "ocl_types.h" + +int work_group_all(int predicate); +int work_group_any(int predicate); + +/* broadcast */ +OVERLOADABLE int work_group_broadcast(int a, size_t local_id); +OVERLOADABLE uint work_group_broadcast(uint a, size_t local_id); +OVERLOADABLE long work_group_broadcast(long a, size_t local_id); +OVERLOADABLE ulong work_group_broadcast(ulong a, size_t local_id); +OVERLOADABLE float work_group_broadcast(float a, size_t local_id); +OVERLOADABLE double work_group_broadcast(double a, size_t local_id); + +OVERLOADABLE int work_group_broadcast(int a, size_t local_id_x, size_t local_id_y); +OVERLOADABLE uint work_group_broadcast(uint a, size_t local_id_x, size_t local_id_y); +OVERLOADABLE long work_group_broadcast(long a, size_t local_id_x, size_t local_id_y); +OVERLOADABLE ulong work_group_broadcast(ulong a, size_t local_id_x, size_t local_id_y); +OVERLOADABLE float work_group_broadcast(float a, size_t local_id_x, size_t local_id_y); +OVERLOADABLE double work_group_broadcast(double a, size_t local_id_x, size_t local_id_y); + +OVERLOADABLE int work_group_broadcast(int a, size_t local_id_x, size_t local_id_y, size_t local_id_z); +OVERLOADABLE uint work_group_broadcast(uint a, size_t local_id_x, size_t local_id_y, size_t local_id_z); +OVERLOADABLE long work_group_broadcast(long a, size_t local_id_x, size_t local_id_y, size_t local_id_z); +OVERLOADABLE ulong work_group_broadcast(ulong a, size_t local_id_x, size_t local_id_y, size_t local_id_z); +OVERLOADABLE float work_group_broadcast(float a, size_t local_id_x, size_t local_id_y, size_t local_id_z); +OVERLOADABLE double work_group_broadcast(double a, size_t local_id_x, size_t local_id_y, size_t local_id_z); + +/* reduce add */ +OVERLOADABLE int work_group_reduce_add(int x); +OVERLOADABLE uint work_group_reduce_add(uint x); +OVERLOADABLE long work_group_reduce_add(long x); +OVERLOADABLE ulong work_group_reduce_add(ulong x); +OVERLOADABLE float work_group_reduce_add(float x); +OVERLOADABLE double work_group_reduce_add(double x); + +/* reduce min */ +OVERLOADABLE int work_group_reduce_min(int x); +OVERLOADABLE uint work_group_reduce_min(uint x); +OVERLOADABLE long work_group_reduce_min(long x); +OVERLOADABLE ulong work_group_reduce_min(ulong x); +OVERLOADABLE float work_group_reduce_min(float x); +OVERLOADABLE double work_group_reduce_min(double x); + +/* reduce max */ +OVERLOADABLE int work_group_reduce_max(int x); +OVERLOADABLE uint work_group_reduce_max(uint x); +OVERLOADABLE long work_group_reduce_max(long x); +OVERLOADABLE ulong work_group_reduce_max(ulong x); +OVERLOADABLE float work_group_reduce_max(float x); +OVERLOADABLE double work_group_reduce_max(double x); + +/* scan_inclusive add */ +OVERLOADABLE int work_group_scan_inclusive_add(int x); +OVERLOADABLE uint work_group_scan_inclusive_add(uint x); +OVERLOADABLE long work_group_scan_inclusive_add(long x); +OVERLOADABLE ulong work_group_scan_inclusive_add(ulong x); +OVERLOADABLE float work_group_scan_inclusive_add(float x); +OVERLOADABLE double work_group_scan_inclusive_add(double x); + +/* scan_inclusive min */ +OVERLOADABLE int work_group_scan_inclusive_min(int x); +OVERLOADABLE uint work_group_scan_inclusive_min(uint x); +OVERLOADABLE long work_group_scan_inclusive_min(long x); +OVERLOADABLE ulong work_group_scan_inclusive_min(ulong x); +OVERLOADABLE float work_group_scan_inclusive_min(float x); +OVERLOADABLE double work_group_scan_inclusive_min(double x); + +/* scan_inclusive max */ +OVERLOADABLE int work_group_scan_inclusive_max(int x); +OVERLOADABLE uint work_group_scan_inclusive_max(uint x); +OVERLOADABLE long work_group_scan_inclusive_max(long x); +OVERLOADABLE ulong work_group_scan_inclusive_max(ulong x); +OVERLOADABLE float work_group_scan_inclusive_max(float x); +OVERLOADABLE double work_group_scan_inclusive_max(double x); + +/* scan_exclusive add */ +OVERLOADABLE int work_group_scan_exclusive_add(int x); +OVERLOADABLE uint work_group_scan_exclusive_add(uint x); +OVERLOADABLE long work_group_scan_exclusive_add(long x); +OVERLOADABLE ulong work_group_scan_exclusive_add(ulong x); +OVERLOADABLE float work_group_scan_exclusive_add(float x); +OVERLOADABLE double work_group_scan_exclusive_add(double x); + +/* scan_exclusive min */ +OVERLOADABLE int work_group_scan_exclusive_min(int x); +OVERLOADABLE uint work_group_scan_exclusive_min(uint x); +OVERLOADABLE long work_group_scan_exclusive_min(long x); +OVERLOADABLE ulong work_group_scan_exclusive_min(ulong x); +OVERLOADABLE float work_group_scan_exclusive_min(float x); +OVERLOADABLE double work_group_scan_exclusive_min(double x); + +/* scan_exclusive max */ +OVERLOADABLE int work_group_scan_exclusive_max(int x); +OVERLOADABLE uint work_group_scan_exclusive_max(uint x); +OVERLOADABLE long work_group_scan_exclusive_max(long x); +OVERLOADABLE ulong work_group_scan_exclusive_max(ulong x); +OVERLOADABLE float work_group_scan_exclusive_max(float x); +OVERLOADABLE double work_group_scan_exclusive_max(double x); +#endif /* __OCL_WORK_GROUP_H__ */ diff --git a/backend/src/libocl/src/ocl_work_group.cl b/backend/src/libocl/src/ocl_work_group.cl new file mode 100644 index 00000000..065b223d --- /dev/null +++ b/backend/src/libocl/src/ocl_work_group.cl @@ -0,0 +1,126 @@ +/* + * Copyright © 2012 - 2014 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + */ +#include "ocl_work_group.h" + +int __gen_ocl_work_group_all(int predicate); +int work_group_all(int predicate) { + return __gen_ocl_work_group_all(predicate); +} + +int __gen_ocl_work_group_any(int predicate); +int work_group_any(int predicate) { + return __gen_ocl_work_group_any(predicate); +} + +/* broadcast */ +#define BROADCAST_IMPL(GEN_TYPE) \ + OVERLOADABLE GEN_TYPE __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id); \ + OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t local_id) { \ + return __gen_ocl_work_group_broadcast(a, local_id); \ + } \ + OVERLOADABLE GEN_TYPE __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y); \ + OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y) { \ + return __gen_ocl_work_group_broadcast(a, local_id_x, local_id_y); \ + } \ + OVERLOADABLE GEN_TYPE __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y, size_t local_id_z); \ + OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y, size_t local_id_z) { \ + return __gen_ocl_work_group_broadcast(a, local_id_x, local_id_y, local_id_z); \ + } + +BROADCAST_IMPL(int) +BROADCAST_IMPL(uint) +BROADCAST_IMPL(long) +BROADCAST_IMPL(ulong) +BROADCAST_IMPL(float) +BROADCAST_IMPL(double) +#undef BROADCAST_IMPL + + +#define RANGE_OP(RANGE, OP, GEN_TYPE) \ + OVERLOADABLE GEN_TYPE __gen_ocl_work_group_##RANGE##_##OP(GEN_TYPE x); \ + OVERLOADABLE GEN_TYPE work_group_##RANGE##_##OP(GEN_TYPE x) { \ + return __gen_ocl_work_group_##RANGE##_##OP(x); \ + } + +/* reduce add */ +RANGE_OP(reduce, add, int) +RANGE_OP(reduce, add, uint) +RANGE_OP(reduce, add, long) +RANGE_OP(reduce, add, ulong) +RANGE_OP(reduce, add, float) +RANGE_OP(reduce, add, double) +/* reduce min */ +RANGE_OP(reduce, min, int) +RANGE_OP(reduce, min, uint) +RANGE_OP(reduce, min, long) +RANGE_OP(reduce, min, ulong) +RANGE_OP(reduce, min, float) +RANGE_OP(reduce, min, double) +/* reduce max */ +RANGE_OP(reduce, max, int) +RANGE_OP(reduce, max, uint) +RANGE_OP(reduce, max, long) +RANGE_OP(reduce, max, ulong) +RANGE_OP(reduce, max, float) +RANGE_OP(reduce, max, double) + +/* scan_inclusive add */ +RANGE_OP(scan_inclusive, add, int) +RANGE_OP(scan_inclusive, add, uint) +RANGE_OP(scan_inclusive, add, long) +RANGE_OP(scan_inclusive, add, ulong) +RANGE_OP(scan_inclusive, add, float) +RANGE_OP(scan_inclusive, add, double) +/* scan_inclusive min */ +RANGE_OP(scan_inclusive, min, int) +RANGE_OP(scan_inclusive, min, uint) +RANGE_OP(scan_inclusive, min, long) +RANGE_OP(scan_inclusive, min, ulong) +RANGE_OP(scan_inclusive, min, float) +RANGE_OP(scan_inclusive, min, double) +/* scan_inclusive max */ +RANGE_OP(scan_inclusive, max, int) +RANGE_OP(scan_inclusive, max, uint) +RANGE_OP(scan_inclusive, max, long) +RANGE_OP(scan_inclusive, max, ulong) +RANGE_OP(scan_inclusive, max, float) +RANGE_OP(scan_inclusive, max, double) + +/* scan_exclusive add */ +RANGE_OP(scan_exclusive, add, int) +RANGE_OP(scan_exclusive, add, uint) +RANGE_OP(scan_exclusive, add, long) +RANGE_OP(scan_exclusive, add, ulong) +RANGE_OP(scan_exclusive, add, float) +RANGE_OP(scan_exclusive, add, double) +/* scan_exclusive min */ +RANGE_OP(scan_exclusive, min, int) +RANGE_OP(scan_exclusive, min, uint) +RANGE_OP(scan_exclusive, min, long) +RANGE_OP(scan_exclusive, min, ulong) +RANGE_OP(scan_exclusive, min, float) +RANGE_OP(scan_exclusive, min, double) +/* scan_exclusive max */ +RANGE_OP(scan_exclusive, max, int) +RANGE_OP(scan_exclusive, max, uint) +RANGE_OP(scan_exclusive, max, long) +RANGE_OP(scan_exclusive, max, ulong) +RANGE_OP(scan_exclusive, max, float) +RANGE_OP(scan_exclusive, max, double) + +#undef RANGE_OP |