diff options
author | Yang Rong <rong.r.yang@intel.com> | 2016-05-20 11:18:11 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2016-06-12 14:14:07 +0800 |
commit | 42d3c73e67a36ed6e7d8aeccab527a8bf6efa9fc (patch) | |
tree | 9de399ce8a906589c01c28b2d9913d269095ed1b | |
parent | e966c66757acb56f4eb98d9df62dbd8b564214d2 (diff) |
OCL20: add device enqueue helper functions in backend.
This functions collect all block infos, convert unnamed call to named function
call. Collect device enqueue's invoke functions and store them in the unit,
set these functions to OpenCL kernel function.
Because it change the module's kernel functions, so must called before link,
otherwize, the built-in functions called in invoke functions may not be materialized.
Signed-off-by: Yang Rong <rong.r.yang@intel.com>
Reviewed-by: Pan Xiuli <xiuli.pan@intel.com>
-rw-r--r-- | backend/src/CMakeLists.txt | 1 | ||||
-rw-r--r-- | backend/src/ir/unit.hpp | 1 | ||||
-rw-r--r-- | backend/src/llvm/llvm_device_enqueue.cpp | 416 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.hpp | 1 | ||||
-rw-r--r-- | backend/src/llvm/llvm_to_gen.cpp | 13 |
5 files changed, 431 insertions, 1 deletions
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index f26cc8bf..662cce4a 100644 --- a/backend/src/CMakeLists.txt +++ b/backend/src/CMakeLists.txt @@ -89,6 +89,7 @@ set (GBE_SRC llvm/ExpandUtils.cpp llvm/PromoteIntegers.cpp llvm/ExpandLargeIntegers.cpp + llvm/llvm_device_enqueue.cpp llvm/StripAttributes.cpp llvm/llvm_to_gen.cpp llvm/llvm_loadstore_optimization.cpp diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp index 8d1af704..c0b542aa 100644 --- a/backend/src/ir/unit.hpp +++ b/backend/src/ir/unit.hpp @@ -92,6 +92,7 @@ namespace ir { public: typedef map<std::string, Function*> FunctionSet; map<llvm::CallInst*, PrintfSet::PrintfFmt> printfs; + vector<std::string> blockFuncs; /*! Create an empty unit */ Unit(PointerSize pointerSize = POINTER_32_BITS); /*! Release everything (*including* the function pointers) */ diff --git a/backend/src/llvm/llvm_device_enqueue.cpp b/backend/src/llvm/llvm_device_enqueue.cpp new file mode 100644 index 00000000..e867b9d1 --- /dev/null +++ b/backend/src/llvm/llvm_device_enqueue.cpp @@ -0,0 +1,416 @@ +/* + * Copyright © 2014 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "llvm_includes.hpp" + +#include "ir/unit.hpp" +#include "llvm_gen_backend.hpp" +#include "ocl_common_defines.h" + +using namespace llvm; + +namespace gbe { + BitCastInst *isInvokeBitcast(Instruction *I) { + BitCastInst* bt = dyn_cast<BitCastInst>(I); + if (bt == NULL) + return NULL; + + Type* type = bt->getOperand(0)->getType(); + if(!type->isPointerTy()) + return NULL; + + PointerType *pointerType = dyn_cast<PointerType>(type); + Type *pointed = pointerType->getElementType(); + if(!pointed->isFunctionTy()) + return NULL; + + Function *Fn = dyn_cast<Function>(bt->getOperand(0)); + if(Fn == NULL) + return NULL; + + /* This is a fake, to check the function bitcast is for block or not */ + std::string fnName = Fn->getName(); + if(fnName.find("_invoke") == std::string::npos) + return NULL; + + return bt; + } + + void mutateArgAddressSpace(Argument *arg) + { + std::list<Value *>WorkList; + WorkList.push_back(arg); + + while(!WorkList.empty()) { + Value *v = WorkList.front(); + + for (Value::use_iterator iter = v->use_begin(); iter != v->use_end(); ++iter) { + // After LLVM 3.5, use_iterator points to 'Use' instead of 'User', + // which is more straightforward. +#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR < 5) + User *theUser = *iter; +#else + User *theUser = iter->getUser(); +#endif + // becareful with sub operation + if (isa<StoreInst>(theUser) || isa<LoadInst>(theUser)) + continue; + + WorkList.push_back(theUser); + } + + PointerType *ty = dyn_cast<PointerType>(v->getType()); + if(ty == NULL) continue; //should only one argument, private pointer type + ty = PointerType::get(ty->getPointerElementType(), 1); + v->mutateType(ty); + WorkList.pop_front(); + } + } + + Function* setFunctionAsKernel(Module *mod, Function *Fn) + { + LLVMContext &Context = mod->getContext(); + Type *intTy = IntegerType::get(mod->getContext(), 32); + SmallVector<llvm::Metadata *, 5> kernelMDArgs; + + // MDNode for the kernel argument address space qualifiers. + SmallVector<llvm::Metadata *, 8> addressQuals; + addressQuals.push_back(llvm::MDString::get(Context, "kernel_arg_addr_space")); + + // MDNode for the kernel argument access qualifiers (images only). + SmallVector<llvm::Metadata *, 8> accessQuals; + accessQuals.push_back(llvm::MDString::get(Context, "kernel_arg_access_qual")); + + // MDNode for the kernel argument type names. + SmallVector<llvm::Metadata *, 8> argTypeNames; + argTypeNames.push_back(llvm::MDString::get(Context, "kernel_arg_type")); + + // MDNode for the kernel argument base type names. + SmallVector<llvm::Metadata *, 8> argBaseTypeNames; + argBaseTypeNames.push_back( + llvm::MDString::get(Context, "kernel_arg_base_type")); + + // MDNode for the kernel argument type qualifiers. + SmallVector<llvm::Metadata *, 8> argTypeQuals; + argTypeQuals.push_back(llvm::MDString::get(Context, "kernel_arg_type_qual")); + + // MDNode for the kernel argument names. + SmallVector<llvm::Metadata *, 8> argNames; + argNames.push_back(llvm::MDString::get(Context, "kernel_arg_name")); + + //Because paramter type changed, so must re-create the invoke function and replace the old one + std::vector<Type *> ParamTys; + ValueToValueMapTy VMap; + for (Function::arg_iterator I = Fn->arg_begin(), E = Fn->arg_end(); I != E; ++I) { + PointerType *ty = dyn_cast<PointerType>(I->getType()); + if(ty && ty->getAddressSpace() == 0) //Foce set the address space to global + ty = PointerType::get(ty->getPointerElementType(), 1); + ParamTys.push_back(ty); + } + FunctionType* NewFT = FunctionType::get(Fn->getReturnType(), ParamTys, false); + Function* NewFn = Function::Create(NewFT, Function::ExternalLinkage, Fn->getName()); + SmallVector<ReturnInst*, 8> Returns; + + Function::arg_iterator NewFnArgIt = NewFn->arg_begin(); + for (Function::arg_iterator I = Fn->arg_begin(), E = Fn->arg_end(); I != E; ++I) { + std::string ArgName = I->getName(); + NewFnArgIt->setName(ArgName); + VMap[&*I] = &(*NewFnArgIt++); + } + CloneFunctionInto(NewFn, Fn, VMap, /*ModuleLevelChanges=*/true, Returns); + + Fn->setName("__d" + Fn->getName()); + mod->getFunctionList().push_back(NewFn); + //mod->getOrInsertFunction(NewFn->getName(), NewFn->getFunctionType(), + // NewFn->getAttributes()); + + for (Function::arg_iterator I = NewFn->arg_begin(), E = NewFn->arg_end(); I != E; ++I) { + PointerType *ty = dyn_cast<PointerType>(I->getType()); + //mutate the address space of all pointer derive from the argmument from private to global + if(ty && ty->getAddressSpace() == 1) + mutateArgAddressSpace(&*I); + //ty = dyn_cast<PointerType>(I->getType()); + + addressQuals.push_back(llvm::ConstantAsMetadata::get(ConstantInt::get(intTy, ty->getAddressSpace()))); + accessQuals.push_back(llvm::MDString::get(Context, "none")); + argTypeNames.push_back(llvm::MDString::get(Context, "char*")); + argBaseTypeNames.push_back(llvm::MDString::get(Context, "char*")); + argTypeQuals.push_back(llvm::MDString::get(Context, "")); + argNames.push_back(llvm::MDString::get(Context, I->getName())); + } + + kernelMDArgs.push_back(llvm::ConstantAsMetadata::get(NewFn)); + kernelMDArgs.push_back(llvm::MDNode::get(Context, addressQuals)); + kernelMDArgs.push_back(llvm::MDNode::get(Context, accessQuals)); + kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeNames)); + kernelMDArgs.push_back(llvm::MDNode::get(Context, argBaseTypeNames)); + kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeQuals)); + kernelMDArgs.push_back(llvm::MDNode::get(Context, argNames)); + + llvm::MDNode *kernelMDNode = llvm::MDNode::get(mod->getContext(), kernelMDArgs); + llvm::NamedMDNode *OpenCLKernelMetadata = mod->getOrInsertNamedMetadata("opencl.kernels"); + OpenCLKernelMetadata->addOperand(kernelMDNode); + + return NewFn; + } + + Instruction* replaceInst(Instruction *I, Value *v) + { + //The bitcast is instruction + if(BitCastInst *bt = dyn_cast<BitCastInst>(&*I)) { + bt->replaceAllUsesWith(v); + return bt; + } + return NULL; + } + + void collectDeviceEnqueueInfo(Module *mod, ir::Unit &unit) + { + std::set<Instruction*> deadInsnSet; + std::set<Function*> deadFunctionSet; + std::map<Value*, std::string> blocks; + + for (Module::iterator SF = mod->begin(), E = mod->end(); SF != E; ++SF) { + Function *f = &*SF; + if (f->isDeclaration()) continue; + + for (inst_iterator I = inst_begin(f), E = inst_end(f); I != E; ++I) { + if (BitCastInst* bt = isInvokeBitcast(&*I)) { + /* handle block description, convert the instruction that store block + * invoke pointer to store the index in the unit's block functions index.*/ + Function *Fn = dyn_cast<Function>(bt->getOperand(0)); + + std::string fnName = Fn->getName(); + int index = -1; + for(size_t i=0; i<unit.blockFuncs.size(); i++) { + if(unit.blockFuncs[i] == fnName) { + index = i; + break; + } + } + if(index == -1){ + unit.blockFuncs.push_back(fnName); + index = unit.blockFuncs.size() - 1; + } + + for (Value::use_iterator iter = bt->use_begin(); iter != bt->use_end(); ++iter) { +#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR < 5) + User *theUser = *iter; +#else + User *theUser = iter->getUser(); +#endif + if(StoreInst *st = dyn_cast<StoreInst>(theUser)) { + GetElementPtrInst * gep = dyn_cast<GetElementPtrInst>(st->getPointerOperand()); + if(gep) + blocks[gep->getOperand(0)] = fnName; + } + } + + if(StoreInst* st = dyn_cast<StoreInst>(&*I)) { + GetElementPtrInst * gep = dyn_cast<GetElementPtrInst>(st->getPointerOperand()); + if(gep) + blocks[gep->getOperand(0)] = fnName; + } + + Value *v = Constant::getIntegerValue(bt->getType(), APInt(unit.getPointerSize(), index)); + bt->replaceAllUsesWith(v); + deadInsnSet.insert(bt); + } + + if(CallInst *CI = dyn_cast<CallInst>(&*I)) { + IRBuilder<> builder(CI->getParent(), BasicBlock::iterator(CI)); + if(CI->getCalledFunction() == NULL) { + //unnamed call function, parse the use to find the define of called function + SmallVector<Value*, 16> args(CI->op_begin(), CI->op_end()-1); + + Value *v = CI->getCalledValue(); + BitCastInst* bt = dyn_cast<BitCastInst>(v); + if(bt == NULL) + continue; + + LoadInst* ld = dyn_cast<LoadInst>(bt->getOperand(0)); + if(ld == NULL) + continue; + + GetElementPtrInst * gep = dyn_cast<GetElementPtrInst>(ld->getPointerOperand()); + if(gep == NULL) + continue; + + BitCastInst* fnPointer = dyn_cast<BitCastInst>(gep->getOperand(0)); + if(fnPointer == NULL) + continue; + + if(BitCastInst* bt = dyn_cast<BitCastInst>(fnPointer->getOperand(0))) { + std::string fnName = blocks[bt->getOperand(0)]; + Function* f = mod->getFunction(fnName); + CallInst *newCI = builder.CreateCall(f, args); + CI->replaceAllUsesWith(newCI); + deadInsnSet.insert(CI); + continue; + } + + //the function is global variable + if(GlobalVariable* gv = dyn_cast<GlobalVariable>(fnPointer->getOperand(0))) { + Constant *c = gv->getInitializer(); + ConstantExpr *expr = dyn_cast<ConstantExpr>(c->getOperand(3)); + BitCastInst *bt = dyn_cast<BitCastInst>(expr->getAsInstruction()); + Function* f = dyn_cast<Function>(bt->getOperand(0)); + CallInst *newCI = builder.CreateCall(f, args); + CI->replaceAllUsesWith(newCI); + deadInsnSet.insert(CI); + continue; + } + + ld = dyn_cast<LoadInst>(fnPointer->getOperand(0)); + if(ld == NULL) + continue; + + if(GlobalVariable *gv = dyn_cast<GlobalVariable>(ld->getPointerOperand())) { + ConstantExpr *expr = dyn_cast<ConstantExpr>(gv->getInitializer()); + BitCastInst *bt = dyn_cast<BitCastInst>(expr->getAsInstruction()); + GlobalVariable *block_literal = dyn_cast<GlobalVariable>(bt->getOperand(0)); + Constant *v = block_literal->getInitializer(); + expr = dyn_cast<ConstantExpr>(v->getOperand(3)); + bt = dyn_cast<BitCastInst>(expr->getAsInstruction()); + Function* f = dyn_cast<Function>(bt->getOperand(0)); + CallInst *newCI = builder.CreateCall(f, args); + CI->replaceAllUsesWith(newCI); + deadInsnSet.insert(CI); + continue; + } + + if(AllocaInst *ai = dyn_cast<AllocaInst>(ld->getPointerOperand())) { + Value *v = NULL; + for (Value::use_iterator iter = ai->use_begin(); iter != ai->use_end(); ++iter) { +#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR < 5) + User *theUser = *iter; +#else + User *theUser = iter->getUser(); +#endif + if(StoreInst *st = dyn_cast<StoreInst>(theUser)) { + bt = dyn_cast<BitCastInst>(st->getValueOperand()); + if(bt) + v = bt->getOperand(0); + } + } + if(blocks.find(v) == blocks.end()) { + if(GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) { + Constant *c = gv->getInitializer(); + ConstantExpr *expr = dyn_cast<ConstantExpr>(c->getOperand(3)); + BitCastInst *bt = dyn_cast<BitCastInst>(expr->getAsInstruction()); + Function* f = dyn_cast<Function>(bt->getOperand(0)); + blocks[v] = f->getName(); + } + } + + std::string fnName = blocks[v]; + Function* f = mod->getFunction(fnName); + CallInst *newCI = builder.CreateCall(f, args); + CI->replaceAllUsesWith(newCI); + deadInsnSet.insert(CI); + continue; + } + //can't find the function's define + assert(0); + } else { + //handle enqueue_kernel function call + Function *fn = CI->getCalledFunction(); + if (fn->getName().find("enqueue_kernel") == std::string::npos) + continue; + + //block parameter's index, 3 or 6 + int block_index = 3; + Type *type = CI->getArgOperand(block_index)->getType(); + if(type->isIntegerTy()) + block_index = 6; + Value *block = CI->getArgOperand(block_index); + while(isa<BitCastInst>(block)) + block = dyn_cast<BitCastInst>(block)->getOperand(0); + LoadInst *ld = dyn_cast<LoadInst>(block); + Value *v = NULL; + if(ld) { + Value *block = ld->getPointerOperand(); + for (Value::use_iterator iter = block->use_begin(); iter != block->use_end(); ++iter) { +#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR < 5) + User *theUser = *iter; +#else + User *theUser = iter->getUser(); +#endif + if(StoreInst *st = dyn_cast<StoreInst>(theUser)) { + BitCastInst *bt = dyn_cast<BitCastInst>(st->getValueOperand()); + if(bt) + v = bt->getOperand(0); + } + } + if(blocks.find(v) == blocks.end()) { + if(GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) { + Constant *c = gv->getInitializer(); + ConstantExpr *expr = dyn_cast<ConstantExpr>(c->getOperand(3)); + BitCastInst *bt = dyn_cast<BitCastInst>(expr->getAsInstruction()); + Function* f = dyn_cast<Function>(bt->getOperand(0)); + blocks[v] = f->getName(); + } + } + } else if(isa<AllocaInst>(block)) { + v = block; + } + std::string fnName = blocks[v]; + Function* f = mod->getFunction(fnName); + deadFunctionSet.insert(f); + f = setFunctionAsKernel(mod, f); + + if( fn->isVarArg() ) { + //enqueue function with slm, convert to __gen_enqueue_kernel_slm call + //store the slm information to a alloca address. + int start = block_index + 1; + int count = CI->getNumArgOperands() - start; + Type *intTy = IntegerType::get(mod->getContext(), 32); + + AllocaInst *AI = builder.CreateAlloca(intTy, ConstantInt::get(intTy, count)); + + for(uint32_t i = start; i < CI->getNumArgOperands(); i++) { + Value *ptr = builder.CreateGEP(AI, ConstantInt::get(intTy, i-start)); + builder.CreateStore(CI->getArgOperand(i), ptr); + } + SmallVector<Value*, 16> args(CI->op_begin(), CI->op_begin() + 3); + args.push_back(CI->getArgOperand(block_index)); + args.push_back(ConstantInt::get(intTy, count)); + args.push_back(AI); + + std::vector<Type *> ParamTys; + for (Value** I = args.begin(); I != args.end(); ++I) + ParamTys.push_back((*I)->getType()); + CallInst* newCI = builder.CreateCall(cast<llvm::Function>(mod->getOrInsertFunction( + "__gen_enqueue_kernel_slm", FunctionType::get(intTy, ParamTys, false))), args); + CI->replaceAllUsesWith(newCI); + deadInsnSet.insert(CI); + } + } + } + } + } + + for (auto it: deadInsnSet) { + it->eraseFromParent(); + } + + for (auto it: deadFunctionSet) { + it->eraseFromParent(); + } + } +}; diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp index 800f78f4..0c47c679 100644 --- a/backend/src/llvm/llvm_gen_backend.hpp +++ b/backend/src/llvm/llvm_gen_backend.hpp @@ -150,6 +150,7 @@ namespace gbe /*! Add all the function call of ocl to our bitcode. */ llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath); + void collectDeviceEnqueueInfo(llvm::Module *mod, ir::Unit &unit); void* getPrintfInfo(llvm::CallInst* inst); } /* namespace gbe */ diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp index aa470ed1..e1955adb 100644 --- a/backend/src/llvm/llvm_to_gen.cpp +++ b/backend/src/llvm/llvm_to_gen.cpp @@ -272,10 +272,21 @@ namespace gbe if (!cl_mod) return false; OUTPUT_BITCODE(BEFORE_LINK, (*cl_mod)); +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7 + legacy::PassManager passes__; +#else + PassManager passes__; +#endif + //run ExpandConstantExprPass before collectDeviceEnqueueInfo + //to simplify the analyze of block. + passes__.add(createExpandConstantExprPass()); // constant prop may generate ConstantExpr + passes__.run(*cl_mod); + /* Must call before materialize when link */ + collectDeviceEnqueueInfo(cl_mod, unit); std::unique_ptr<Module> M; - /* Before do any thing, we first filter in all CL functions in bitcode. */ + /* Before do any thing, we first filter in all CL functions in bitcode. */ M.reset(runBitCodeLinker(cl_mod, strictMath)); if (!module) delete cl_mod; |