summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2015-04-15 14:40:39 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-04-15 14:40:39 +0800
commit73c31662b0a59100decef275f209df537bb0f404 (patch)
treef241e4421c2f092a6733ebeba1e5f38f33192e64
parentd566a8310d2211f2cafaa18212b7695f9be0fbc0 (diff)
draft to introduce enqueue builtin functions.enqueue
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
-rw-r--r--backend/src/backend/program.cpp1
-rw-r--r--backend/src/libocl/CMakeLists.txt6
-rw-r--r--backend/src/libocl/include/ocl.h1
-rw-r--r--backend/src/libocl/include/ocl_enqueue.h65
-rw-r--r--backend/src/libocl/include/ocl_types.h4
-rw-r--r--backend/src/libocl/src/ocl_enqueue.cl36
-rw-r--r--backend/src/llvm/llvm_bitcode_link.cpp3
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx2
-rw-r--r--backend/src/llvm/llvm_to_gen.cpp3
-rw-r--r--kernels/test_copy_buffer.cl13
10 files changed, 125 insertions, 9 deletions
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index eee7c3c5..202d1bd3 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -554,6 +554,7 @@ namespace gbe {
args.push_back(input);
args.push_back("-ffp-contract=off");
+ args.push_back("-fblocks");
// The compiler invocation needs a DiagnosticsEngine so it can report problems
std::string ErrorString;
diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
index 6b825b04..a62d7e6a 100644
--- a/backend/src/libocl/CMakeLists.txt
+++ b/backend/src/libocl/CMakeLists.txt
@@ -52,7 +52,7 @@ FOREACH(M ${OCL_COPY_HEADERS})
COPY_THE_HEADER(${M})
ENDFOREACH(M)
-SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_misc ocl_vload ocl_geometric ocl_image)
+SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_enqueue ocl_sync ocl_misc ocl_vload ocl_geometric ocl_image)
FOREACH(M ${OCL_COPY_MODULES})
COPY_THE_HEADER(${M})
COPY_THE_SOURCE(${M})
@@ -200,14 +200,14 @@ ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_DIR}/beignet.bc
ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_DIR}/beignet.local.pch
COMMAND mkdir -p ${OCL_OBJECT_DIR}
- COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS} -triple spir -I ${OCL_OBJECT_DIR}/include/ -emit-pch -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o ${OCL_OBJECT_DIR}/beignet.local.pch
+ COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS} -triple spir -I ${OCL_OBJECT_DIR}/include/ -fblocks -emit-pch -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o ${OCL_OBJECT_DIR}/beignet.local.pch
DEPENDS ${OCL_HEADER_FILES}
COMMENT "Generate the pch file: ${OCL_OBJECT_DIR}/beignet.local.pch"
)
ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_DIR}/beignet.pch
COMMAND mkdir -p ${OCL_OBJECT_DIR}
- COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS} -triple spir -I ${OCL_OBJECT_DIR}/include/ --relocatable-pch -emit-pch -isysroot ${LIBOCL_BINARY_DIR} -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o ${OCL_OBJECT_DIR}/beignet.pch
+ COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS} -triple spir -I ${OCL_OBJECT_DIR}/include/ -fblocks --relocatable-pch -emit-pch -isysroot ${LIBOCL_BINARY_DIR} -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o ${OCL_OBJECT_DIR}/beignet.pch
DEPENDS ${OCL_HEADER_FILES}
COMMENT "Generate the pch file: ${OCL_OBJECT_DIR}/beignet.pch"
)
diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
index e8866702..92e6c4ca 100644
--- a/backend/src/libocl/include/ocl.h
+++ b/backend/src/libocl/include/ocl.h
@@ -34,6 +34,7 @@
#include "ocl_printf.h"
#include "ocl_relational.h"
#include "ocl_sync.h"
+#include "ocl_enqueue.h"
#include "ocl_vload.h"
#include "ocl_workitem.h"
#pragma OPENCL EXTENSION cl_khr_fp64 : disable
diff --git a/backend/src/libocl/include/ocl_enqueue.h b/backend/src/libocl/include/ocl_enqueue.h
new file mode 100644
index 00000000..f296e224
--- /dev/null
+++ b/backend/src/libocl/include/ocl_enqueue.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright © 2015 - 2015 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#ifndef __OCL_ENQUEUE_H__
+#define __OCL_ENQUEUE_H__
+
+#include "ocl_types.h"
+
+/////////////////////////////////////////////////////////////////////////////
+// Synchronization functions
+/////////////////////////////////////////////////////////////////////////////
+typedef enum kernel_enqueue_flags_t{
+ CLK_ENQUEUE_FLAGS_NO_WAIT,
+ CLK_ENQUEUE_FLAGS_WAIT_KERNEL,
+ CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP
+} kernel_enqueue_flags_t;
+
+typedef struct ndrange_t {
+ size_t globalSize[3];
+ size_t localSize[3];
+ size_t globalOffset[3];
+} ndrange_t;
+
+typedef uint queue_t;
+
+int enqueue_kernel(queue_t queue, kernel_enqueue_flags_t flags,
+ const ndrange_t ndrange); //, void (^block)(void));
+
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size);
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size, size_t local_work_size);
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_offset,
+ size_t global_work_size,
+ size_t local_work_size);
+OVERLOADABLE ndrange_t ndrange_2D(const size_t global_work_size[2]);
+OVERLOADABLE ndrange_t ndrange_2D(const size_t global_work_size[2],
+ const size_t local_work_size[2]);
+OVERLOADABLE ndrange_t ndrange_2D(const size_t global_work_offset[2],
+ const size_t global_work_size[2],
+ const size_t local_work_size[2]);
+OVERLOADABLE ndrange_t ndrange_2D(const size_t global_work_offset[2],
+ const size_t global_work_size[2],
+ const size_t local_work_size[2]);
+OVERLOADABLE ndrange_t ndrange_3D(const size_t global_work_size[3]);
+OVERLOADABLE ndrange_t ndrange_3D(const size_t global_work_size[3],
+ const size_t local_work_size[3]);
+OVERLOADABLE ndrange_t ndrange_3D(const size_t global_work_offset[3],
+ const size_t global_work_size[3],
+ const size_t local_work_size[3]);
+
+
+#endif /* __OCL_SYNC_H__ */
diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h
index ae0236b2..bb2b8919 100644
--- a/backend/src/libocl/include/ocl_types.h
+++ b/backend/src/libocl/include/ocl_types.h
@@ -84,8 +84,4 @@ DEF(double);
/////////////////////////////////////////////////////////////////////////////
// OpenCL built-in event types
/////////////////////////////////////////////////////////////////////////////
-// FIXME:
-// This is a transitional hack to bypass the LLVM 3.3 built-in types.
-// See the Khronos SPIR specification for handling of these types.
-
#endif /* __OCL_TYPES_H__ */
diff --git a/backend/src/libocl/src/ocl_enqueue.cl b/backend/src/libocl/src/ocl_enqueue.cl
new file mode 100644
index 00000000..2dd87307
--- /dev/null
+++ b/backend/src/libocl/src/ocl_enqueue.cl
@@ -0,0 +1,36 @@
+/*
+ * Copyright © 2012 - 2014 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "ocl_enqueue.h"
+
+int __gen_ocl_enqueue_kernel(uint, uint);
+
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size) {
+ ndrange_t value;
+ value.globalSize[0] = global_work_size;
+ value.globalSize[1] = value.globalSize[2] = 1;
+ value.localSize[0] = global_work_size > 16 ? 16 : global_work_size;
+ value.localSize[1] = value.localSize[2] = 1;
+ value.globalOffset[0] = value.globalOffset[1] = value.globalOffset[2] = 0;
+ return value;
+}
+
+int enqueue_kernel(queue_t queue, kernel_enqueue_flags_t flags,
+ const ndrange_t ndrange) //, void (^block)(void));
+{
+ return __gen_ocl_enqueue_kernel(ndrange.globalSize[0], ndrange.localSize[0]);
+}
diff --git a/backend/src/llvm/llvm_bitcode_link.cpp b/backend/src/llvm/llvm_bitcode_link.cpp
index ebf43860..9f57a3a7 100644
--- a/backend/src/llvm/llvm_bitcode_link.cpp
+++ b/backend/src/llvm/llvm_bitcode_link.cpp
@@ -108,7 +108,8 @@ namespace gbe
newMF = src.getFunction(fnName);
if (!newMF) {
printf("Can not find the lib: %s\n", fnName.c_str());
- return false;
+ //return false;
+ continue;
}
fromSrc = true;
}
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 9536a3c4..a0aaa4e4 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -151,6 +151,8 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf)
DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32)
DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
+DECL_LLVM_GEN_FUNCTION(ENQUEUE_KERNEL, __gen_ocl_enqueue_kernel)
+
// SIMD level function for internal usage
DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 4ea722af..fe053f3f 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -225,7 +225,7 @@ namespace gbe
{
std::string errInfo;
std::unique_ptr<llvm::raw_fd_ostream> o = NULL;
- if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN)
+ //if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN)
o = std::unique_ptr<llvm::raw_fd_ostream>(new llvm::raw_fd_ostream(fileno(stdout), false));
// Get the module from its file
@@ -298,6 +298,7 @@ namespace gbe
passes.add(createCFGSimplificationPass()); // Merge & remove BBs
passes.add(createLowerSwitchPass()); // simplify cfg will generate switch-case instruction
passes.add(createScalarizePass()); // Expand all vector ops
+ passes.add(createPrintModulePass(*o));
if(OCL_OUTPUT_CFG)
passes.add(createCFGPrinterPass());
diff --git a/kernels/test_copy_buffer.cl b/kernels/test_copy_buffer.cl
index 6f2fd22d..5c240556 100644
--- a/kernels/test_copy_buffer.cl
+++ b/kernels/test_copy_buffer.cl
@@ -1,6 +1,19 @@
__kernel void
+foo(__global float* src, __global float *dst)
+{
+ int id = (int)get_global_id(0);
+ dst[id] = src[id];
+}
+
+__kernel void
test_copy_buffer(__global float* src, __global float* dst)
{
int id = (int)get_global_id(0);
dst[id] = src[id];
+
+ //void (^my_block_A)(void) = ^(void) { foo(src, dst); };
+ //my_block_A();
+ queue_t e;
+ ndrange_t ndrange = ndrange_1D(64);
+ int ret = enqueue_kernel(e, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange);
}