summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2015-11-18 14:06:43 +0800
committerYang Rong <rong.r.yang@intel.com>2015-11-25 13:22:03 +0800
commit7840918e23639fee74e745c7fcf4d4ecbcc6c25d (patch)
tree6780161a0fe5b8c4eec40ef931d5375ab0b78227
parentf7672c62024b677774a705c4fc88deb12344fa04 (diff)
libocl: Add the module for work_group functions.
Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/libocl/CMakeLists.txt2
-rw-r--r--backend/src/libocl/include/ocl.h1
-rw-r--r--backend/src/libocl/include/ocl_work_group.h118
-rw-r--r--backend/src/libocl/src/ocl_work_group.cl126
4 files changed, 246 insertions, 1 deletions
diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
index 0fffd9b1..1d1ec680 100644
--- a/backend/src/libocl/CMakeLists.txt
+++ b/backend/src/libocl/CMakeLists.txt
@@ -53,7 +53,7 @@ FOREACH(M ${OCL_COPY_HEADERS})
ENDFOREACH(M)
SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_memcpy
- ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image)
+ ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image ocl_work_group)
FOREACH(M ${OCL_COPY_MODULES})
COPY_THE_HEADER(${M})
COPY_THE_SOURCE(${M})
diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
index 3a1f4bfd..abb2bd43 100644
--- a/backend/src/libocl/include/ocl.h
+++ b/backend/src/libocl/include/ocl.h
@@ -39,6 +39,7 @@
#include "ocl_vload.h"
#include "ocl_workitem.h"
#include "ocl_simd.h"
+#include "ocl_work_group.h"
#pragma OPENCL EXTENSION cl_khr_fp64 : disable
#pragma OPENCL EXTENSION cl_khr_fp16 : disable
#endif
diff --git a/backend/src/libocl/include/ocl_work_group.h b/backend/src/libocl/include/ocl_work_group.h
new file mode 100644
index 00000000..ebd264fd
--- /dev/null
+++ b/backend/src/libocl/include/ocl_work_group.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright © 2012 - 2014 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#ifndef __OCL_WORK_GROUP_H__
+#define __OCL_WORK_GROUP_H__
+#include "ocl_types.h"
+
+int work_group_all(int predicate);
+int work_group_any(int predicate);
+
+/* broadcast */
+OVERLOADABLE int work_group_broadcast(int a, size_t local_id);
+OVERLOADABLE uint work_group_broadcast(uint a, size_t local_id);
+OVERLOADABLE long work_group_broadcast(long a, size_t local_id);
+OVERLOADABLE ulong work_group_broadcast(ulong a, size_t local_id);
+OVERLOADABLE float work_group_broadcast(float a, size_t local_id);
+OVERLOADABLE double work_group_broadcast(double a, size_t local_id);
+
+OVERLOADABLE int work_group_broadcast(int a, size_t local_id_x, size_t local_id_y);
+OVERLOADABLE uint work_group_broadcast(uint a, size_t local_id_x, size_t local_id_y);
+OVERLOADABLE long work_group_broadcast(long a, size_t local_id_x, size_t local_id_y);
+OVERLOADABLE ulong work_group_broadcast(ulong a, size_t local_id_x, size_t local_id_y);
+OVERLOADABLE float work_group_broadcast(float a, size_t local_id_x, size_t local_id_y);
+OVERLOADABLE double work_group_broadcast(double a, size_t local_id_x, size_t local_id_y);
+
+OVERLOADABLE int work_group_broadcast(int a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+OVERLOADABLE uint work_group_broadcast(uint a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+OVERLOADABLE long work_group_broadcast(long a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+OVERLOADABLE ulong work_group_broadcast(ulong a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+OVERLOADABLE float work_group_broadcast(float a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+OVERLOADABLE double work_group_broadcast(double a, size_t local_id_x, size_t local_id_y, size_t local_id_z);
+
+/* reduce add */
+OVERLOADABLE int work_group_reduce_add(int x);
+OVERLOADABLE uint work_group_reduce_add(uint x);
+OVERLOADABLE long work_group_reduce_add(long x);
+OVERLOADABLE ulong work_group_reduce_add(ulong x);
+OVERLOADABLE float work_group_reduce_add(float x);
+OVERLOADABLE double work_group_reduce_add(double x);
+
+/* reduce min */
+OVERLOADABLE int work_group_reduce_min(int x);
+OVERLOADABLE uint work_group_reduce_min(uint x);
+OVERLOADABLE long work_group_reduce_min(long x);
+OVERLOADABLE ulong work_group_reduce_min(ulong x);
+OVERLOADABLE float work_group_reduce_min(float x);
+OVERLOADABLE double work_group_reduce_min(double x);
+
+/* reduce max */
+OVERLOADABLE int work_group_reduce_max(int x);
+OVERLOADABLE uint work_group_reduce_max(uint x);
+OVERLOADABLE long work_group_reduce_max(long x);
+OVERLOADABLE ulong work_group_reduce_max(ulong x);
+OVERLOADABLE float work_group_reduce_max(float x);
+OVERLOADABLE double work_group_reduce_max(double x);
+
+/* scan_inclusive add */
+OVERLOADABLE int work_group_scan_inclusive_add(int x);
+OVERLOADABLE uint work_group_scan_inclusive_add(uint x);
+OVERLOADABLE long work_group_scan_inclusive_add(long x);
+OVERLOADABLE ulong work_group_scan_inclusive_add(ulong x);
+OVERLOADABLE float work_group_scan_inclusive_add(float x);
+OVERLOADABLE double work_group_scan_inclusive_add(double x);
+
+/* scan_inclusive min */
+OVERLOADABLE int work_group_scan_inclusive_min(int x);
+OVERLOADABLE uint work_group_scan_inclusive_min(uint x);
+OVERLOADABLE long work_group_scan_inclusive_min(long x);
+OVERLOADABLE ulong work_group_scan_inclusive_min(ulong x);
+OVERLOADABLE float work_group_scan_inclusive_min(float x);
+OVERLOADABLE double work_group_scan_inclusive_min(double x);
+
+/* scan_inclusive max */
+OVERLOADABLE int work_group_scan_inclusive_max(int x);
+OVERLOADABLE uint work_group_scan_inclusive_max(uint x);
+OVERLOADABLE long work_group_scan_inclusive_max(long x);
+OVERLOADABLE ulong work_group_scan_inclusive_max(ulong x);
+OVERLOADABLE float work_group_scan_inclusive_max(float x);
+OVERLOADABLE double work_group_scan_inclusive_max(double x);
+
+/* scan_exclusive add */
+OVERLOADABLE int work_group_scan_exclusive_add(int x);
+OVERLOADABLE uint work_group_scan_exclusive_add(uint x);
+OVERLOADABLE long work_group_scan_exclusive_add(long x);
+OVERLOADABLE ulong work_group_scan_exclusive_add(ulong x);
+OVERLOADABLE float work_group_scan_exclusive_add(float x);
+OVERLOADABLE double work_group_scan_exclusive_add(double x);
+
+/* scan_exclusive min */
+OVERLOADABLE int work_group_scan_exclusive_min(int x);
+OVERLOADABLE uint work_group_scan_exclusive_min(uint x);
+OVERLOADABLE long work_group_scan_exclusive_min(long x);
+OVERLOADABLE ulong work_group_scan_exclusive_min(ulong x);
+OVERLOADABLE float work_group_scan_exclusive_min(float x);
+OVERLOADABLE double work_group_scan_exclusive_min(double x);
+
+/* scan_exclusive max */
+OVERLOADABLE int work_group_scan_exclusive_max(int x);
+OVERLOADABLE uint work_group_scan_exclusive_max(uint x);
+OVERLOADABLE long work_group_scan_exclusive_max(long x);
+OVERLOADABLE ulong work_group_scan_exclusive_max(ulong x);
+OVERLOADABLE float work_group_scan_exclusive_max(float x);
+OVERLOADABLE double work_group_scan_exclusive_max(double x);
+#endif /* __OCL_WORK_GROUP_H__ */
diff --git a/backend/src/libocl/src/ocl_work_group.cl b/backend/src/libocl/src/ocl_work_group.cl
new file mode 100644
index 00000000..065b223d
--- /dev/null
+++ b/backend/src/libocl/src/ocl_work_group.cl
@@ -0,0 +1,126 @@
+/*
+ * Copyright © 2012 - 2014 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "ocl_work_group.h"
+
+int __gen_ocl_work_group_all(int predicate);
+int work_group_all(int predicate) {
+ return __gen_ocl_work_group_all(predicate);
+}
+
+int __gen_ocl_work_group_any(int predicate);
+int work_group_any(int predicate) {
+ return __gen_ocl_work_group_any(predicate);
+}
+
+/* broadcast */
+#define BROADCAST_IMPL(GEN_TYPE) \
+ OVERLOADABLE GEN_TYPE __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id); \
+ OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t local_id) { \
+ return __gen_ocl_work_group_broadcast(a, local_id); \
+ } \
+ OVERLOADABLE GEN_TYPE __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y); \
+ OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y) { \
+ return __gen_ocl_work_group_broadcast(a, local_id_x, local_id_y); \
+ } \
+ OVERLOADABLE GEN_TYPE __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y, size_t local_id_z); \
+ OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t local_id_y, size_t local_id_z) { \
+ return __gen_ocl_work_group_broadcast(a, local_id_x, local_id_y, local_id_z); \
+ }
+
+BROADCAST_IMPL(int)
+BROADCAST_IMPL(uint)
+BROADCAST_IMPL(long)
+BROADCAST_IMPL(ulong)
+BROADCAST_IMPL(float)
+BROADCAST_IMPL(double)
+#undef BROADCAST_IMPL
+
+
+#define RANGE_OP(RANGE, OP, GEN_TYPE) \
+ OVERLOADABLE GEN_TYPE __gen_ocl_work_group_##RANGE##_##OP(GEN_TYPE x); \
+ OVERLOADABLE GEN_TYPE work_group_##RANGE##_##OP(GEN_TYPE x) { \
+ return __gen_ocl_work_group_##RANGE##_##OP(x); \
+ }
+
+/* reduce add */
+RANGE_OP(reduce, add, int)
+RANGE_OP(reduce, add, uint)
+RANGE_OP(reduce, add, long)
+RANGE_OP(reduce, add, ulong)
+RANGE_OP(reduce, add, float)
+RANGE_OP(reduce, add, double)
+/* reduce min */
+RANGE_OP(reduce, min, int)
+RANGE_OP(reduce, min, uint)
+RANGE_OP(reduce, min, long)
+RANGE_OP(reduce, min, ulong)
+RANGE_OP(reduce, min, float)
+RANGE_OP(reduce, min, double)
+/* reduce max */
+RANGE_OP(reduce, max, int)
+RANGE_OP(reduce, max, uint)
+RANGE_OP(reduce, max, long)
+RANGE_OP(reduce, max, ulong)
+RANGE_OP(reduce, max, float)
+RANGE_OP(reduce, max, double)
+
+/* scan_inclusive add */
+RANGE_OP(scan_inclusive, add, int)
+RANGE_OP(scan_inclusive, add, uint)
+RANGE_OP(scan_inclusive, add, long)
+RANGE_OP(scan_inclusive, add, ulong)
+RANGE_OP(scan_inclusive, add, float)
+RANGE_OP(scan_inclusive, add, double)
+/* scan_inclusive min */
+RANGE_OP(scan_inclusive, min, int)
+RANGE_OP(scan_inclusive, min, uint)
+RANGE_OP(scan_inclusive, min, long)
+RANGE_OP(scan_inclusive, min, ulong)
+RANGE_OP(scan_inclusive, min, float)
+RANGE_OP(scan_inclusive, min, double)
+/* scan_inclusive max */
+RANGE_OP(scan_inclusive, max, int)
+RANGE_OP(scan_inclusive, max, uint)
+RANGE_OP(scan_inclusive, max, long)
+RANGE_OP(scan_inclusive, max, ulong)
+RANGE_OP(scan_inclusive, max, float)
+RANGE_OP(scan_inclusive, max, double)
+
+/* scan_exclusive add */
+RANGE_OP(scan_exclusive, add, int)
+RANGE_OP(scan_exclusive, add, uint)
+RANGE_OP(scan_exclusive, add, long)
+RANGE_OP(scan_exclusive, add, ulong)
+RANGE_OP(scan_exclusive, add, float)
+RANGE_OP(scan_exclusive, add, double)
+/* scan_exclusive min */
+RANGE_OP(scan_exclusive, min, int)
+RANGE_OP(scan_exclusive, min, uint)
+RANGE_OP(scan_exclusive, min, long)
+RANGE_OP(scan_exclusive, min, ulong)
+RANGE_OP(scan_exclusive, min, float)
+RANGE_OP(scan_exclusive, min, double)
+/* scan_exclusive max */
+RANGE_OP(scan_exclusive, max, int)
+RANGE_OP(scan_exclusive, max, uint)
+RANGE_OP(scan_exclusive, max, long)
+RANGE_OP(scan_exclusive, max, ulong)
+RANGE_OP(scan_exclusive, max, float)
+RANGE_OP(scan_exclusive, max, double)
+
+#undef RANGE_OP