diff options
author | Zhigang Gong <zhigang.gong@intel.com> | 2015-04-15 14:40:39 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-04-15 14:40:39 +0800 |
commit | 73c31662b0a59100decef275f209df537bb0f404 (patch) | |
tree | f241e4421c2f092a6733ebeba1e5f38f33192e64 | |
parent | d566a8310d2211f2cafaa18212b7695f9be0fbc0 (diff) |
draft to introduce enqueue builtin functions.enqueue
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
-rw-r--r-- | backend/src/backend/program.cpp | 1 | ||||
-rw-r--r-- | backend/src/libocl/CMakeLists.txt | 6 | ||||
-rw-r--r-- | backend/src/libocl/include/ocl.h | 1 | ||||
-rw-r--r-- | backend/src/libocl/include/ocl_enqueue.h | 65 | ||||
-rw-r--r-- | backend/src/libocl/include/ocl_types.h | 4 | ||||
-rw-r--r-- | backend/src/libocl/src/ocl_enqueue.cl | 36 | ||||
-rw-r--r-- | backend/src/llvm/llvm_bitcode_link.cpp | 3 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_ocl_function.hxx | 2 | ||||
-rw-r--r-- | backend/src/llvm/llvm_to_gen.cpp | 3 | ||||
-rw-r--r-- | kernels/test_copy_buffer.cl | 13 |
10 files changed, 125 insertions, 9 deletions
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index eee7c3c5..202d1bd3 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -554,6 +554,7 @@ namespace gbe { args.push_back(input); args.push_back("-ffp-contract=off"); + args.push_back("-fblocks"); // The compiler invocation needs a DiagnosticsEngine so it can report problems std::string ErrorString; diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt index 6b825b04..a62d7e6a 100644 --- a/backend/src/libocl/CMakeLists.txt +++ b/backend/src/libocl/CMakeLists.txt @@ -52,7 +52,7 @@ FOREACH(M ${OCL_COPY_HEADERS}) COPY_THE_HEADER(${M}) ENDFOREACH(M) -SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_misc ocl_vload ocl_geometric ocl_image) +SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_enqueue ocl_sync ocl_misc ocl_vload ocl_geometric ocl_image) FOREACH(M ${OCL_COPY_MODULES}) COPY_THE_HEADER(${M}) COPY_THE_SOURCE(${M}) @@ -200,14 +200,14 @@ ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_DIR}/beignet.bc ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_DIR}/beignet.local.pch COMMAND mkdir -p ${OCL_OBJECT_DIR} - COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS} -triple spir -I ${OCL_OBJECT_DIR}/include/ -emit-pch -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o ${OCL_OBJECT_DIR}/beignet.local.pch + COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS} -triple spir -I ${OCL_OBJECT_DIR}/include/ -fblocks -emit-pch -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o ${OCL_OBJECT_DIR}/beignet.local.pch DEPENDS ${OCL_HEADER_FILES} COMMENT "Generate the pch file: ${OCL_OBJECT_DIR}/beignet.local.pch" ) ADD_CUSTOM_COMMAND(OUTPUT ${OCL_OBJECT_DIR}/beignet.pch COMMAND mkdir -p ${OCL_OBJECT_DIR} - COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS} -triple spir -I ${OCL_OBJECT_DIR}/include/ --relocatable-pch -emit-pch -isysroot ${LIBOCL_BINARY_DIR} -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o ${OCL_OBJECT_DIR}/beignet.pch + COMMAND ${CLANG_EXECUTABLE} -cc1 ${CLANG_OCL_FLAGS} -triple spir -I ${OCL_OBJECT_DIR}/include/ -fblocks --relocatable-pch -emit-pch -isysroot ${LIBOCL_BINARY_DIR} -x cl ${OCL_OBJECT_DIR}/include/ocl.h -o ${OCL_OBJECT_DIR}/beignet.pch DEPENDS ${OCL_HEADER_FILES} COMMENT "Generate the pch file: ${OCL_OBJECT_DIR}/beignet.pch" ) diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h index e8866702..92e6c4ca 100644 --- a/backend/src/libocl/include/ocl.h +++ b/backend/src/libocl/include/ocl.h @@ -34,6 +34,7 @@ #include "ocl_printf.h" #include "ocl_relational.h" #include "ocl_sync.h" +#include "ocl_enqueue.h" #include "ocl_vload.h" #include "ocl_workitem.h" #pragma OPENCL EXTENSION cl_khr_fp64 : disable diff --git a/backend/src/libocl/include/ocl_enqueue.h b/backend/src/libocl/include/ocl_enqueue.h new file mode 100644 index 00000000..f296e224 --- /dev/null +++ b/backend/src/libocl/include/ocl_enqueue.h @@ -0,0 +1,65 @@ +/* + * Copyright © 2015 - 2015 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + */ +#ifndef __OCL_ENQUEUE_H__ +#define __OCL_ENQUEUE_H__ + +#include "ocl_types.h" + +///////////////////////////////////////////////////////////////////////////// +// Synchronization functions +///////////////////////////////////////////////////////////////////////////// +typedef enum kernel_enqueue_flags_t{ + CLK_ENQUEUE_FLAGS_NO_WAIT, + CLK_ENQUEUE_FLAGS_WAIT_KERNEL, + CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP +} kernel_enqueue_flags_t; + +typedef struct ndrange_t { + size_t globalSize[3]; + size_t localSize[3]; + size_t globalOffset[3]; +} ndrange_t; + +typedef uint queue_t; + +int enqueue_kernel(queue_t queue, kernel_enqueue_flags_t flags, + const ndrange_t ndrange); //, void (^block)(void)); + +OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size); +OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size, size_t local_work_size); +OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_offset, + size_t global_work_size, + size_t local_work_size); +OVERLOADABLE ndrange_t ndrange_2D(const size_t global_work_size[2]); +OVERLOADABLE ndrange_t ndrange_2D(const size_t global_work_size[2], + const size_t local_work_size[2]); +OVERLOADABLE ndrange_t ndrange_2D(const size_t global_work_offset[2], + const size_t global_work_size[2], + const size_t local_work_size[2]); +OVERLOADABLE ndrange_t ndrange_2D(const size_t global_work_offset[2], + const size_t global_work_size[2], + const size_t local_work_size[2]); +OVERLOADABLE ndrange_t ndrange_3D(const size_t global_work_size[3]); +OVERLOADABLE ndrange_t ndrange_3D(const size_t global_work_size[3], + const size_t local_work_size[3]); +OVERLOADABLE ndrange_t ndrange_3D(const size_t global_work_offset[3], + const size_t global_work_size[3], + const size_t local_work_size[3]); + + +#endif /* __OCL_SYNC_H__ */ diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h index ae0236b2..bb2b8919 100644 --- a/backend/src/libocl/include/ocl_types.h +++ b/backend/src/libocl/include/ocl_types.h @@ -84,8 +84,4 @@ DEF(double); ///////////////////////////////////////////////////////////////////////////// // OpenCL built-in event types ///////////////////////////////////////////////////////////////////////////// -// FIXME: -// This is a transitional hack to bypass the LLVM 3.3 built-in types. -// See the Khronos SPIR specification for handling of these types. - #endif /* __OCL_TYPES_H__ */ diff --git a/backend/src/libocl/src/ocl_enqueue.cl b/backend/src/libocl/src/ocl_enqueue.cl new file mode 100644 index 00000000..2dd87307 --- /dev/null +++ b/backend/src/libocl/src/ocl_enqueue.cl @@ -0,0 +1,36 @@ +/* + * Copyright © 2012 - 2014 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + */ +#include "ocl_enqueue.h" + +int __gen_ocl_enqueue_kernel(uint, uint); + +OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size) { + ndrange_t value; + value.globalSize[0] = global_work_size; + value.globalSize[1] = value.globalSize[2] = 1; + value.localSize[0] = global_work_size > 16 ? 16 : global_work_size; + value.localSize[1] = value.localSize[2] = 1; + value.globalOffset[0] = value.globalOffset[1] = value.globalOffset[2] = 0; + return value; +} + +int enqueue_kernel(queue_t queue, kernel_enqueue_flags_t flags, + const ndrange_t ndrange) //, void (^block)(void)); +{ + return __gen_ocl_enqueue_kernel(ndrange.globalSize[0], ndrange.localSize[0]); +} diff --git a/backend/src/llvm/llvm_bitcode_link.cpp b/backend/src/llvm/llvm_bitcode_link.cpp index ebf43860..9f57a3a7 100644 --- a/backend/src/llvm/llvm_bitcode_link.cpp +++ b/backend/src/llvm/llvm_bitcode_link.cpp @@ -108,7 +108,8 @@ namespace gbe newMF = src.getFunction(fnName); if (!newMF) { printf("Can not find the lib: %s\n", fnName.c_str()); - return false; + //return false; + continue; } fromSrc = true; } diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 9536a3c4..a0aaa4e4 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -151,6 +151,8 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf) DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32) DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16) +DECL_LLVM_GEN_FUNCTION(ENQUEUE_KERNEL, __gen_ocl_enqueue_kernel) + // SIMD level function for internal usage DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all) diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp index 4ea722af..fe053f3f 100644 --- a/backend/src/llvm/llvm_to_gen.cpp +++ b/backend/src/llvm/llvm_to_gen.cpp @@ -225,7 +225,7 @@ namespace gbe { std::string errInfo; std::unique_ptr<llvm::raw_fd_ostream> o = NULL; - if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN) + //if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN) o = std::unique_ptr<llvm::raw_fd_ostream>(new llvm::raw_fd_ostream(fileno(stdout), false)); // Get the module from its file @@ -298,6 +298,7 @@ namespace gbe passes.add(createCFGSimplificationPass()); // Merge & remove BBs passes.add(createLowerSwitchPass()); // simplify cfg will generate switch-case instruction passes.add(createScalarizePass()); // Expand all vector ops + passes.add(createPrintModulePass(*o)); if(OCL_OUTPUT_CFG) passes.add(createCFGPrinterPass()); diff --git a/kernels/test_copy_buffer.cl b/kernels/test_copy_buffer.cl index 6f2fd22d..5c240556 100644 --- a/kernels/test_copy_buffer.cl +++ b/kernels/test_copy_buffer.cl @@ -1,6 +1,19 @@ __kernel void +foo(__global float* src, __global float *dst) +{ + int id = (int)get_global_id(0); + dst[id] = src[id]; +} + +__kernel void test_copy_buffer(__global float* src, __global float* dst) { int id = (int)get_global_id(0); dst[id] = src[id]; + + //void (^my_block_A)(void) = ^(void) { foo(src, dst); }; + //my_block_A(); + queue_t e; + ndrange_t ndrange = ndrange_1D(64); + int ret = enqueue_kernel(e, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange); } |