diff options
author | Ruiling Song <ruiling.song@intel.com> | 2014-07-30 13:59:30 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-09-05 17:47:42 +0800 |
commit | 00a1a882bb5ac3df51484ef3bbfeff5d1fd5de3f (patch) | |
tree | afba7f36ae6dcd5fb26e13eed37d5f5b478d7b06 | |
parent | 21d184b0b21e209d3d2ebcf6baf54b10e0240064 (diff) |
GBE: Handle bti allocation for internal buffer used by printf.
1. Move the bti/Register map from gbe::Context to ir::Function.
2. use GlobalVariable instead of 'call' to get internal buffer (used for printf) base address.
Signed-off-by: Ruiling Song <ruiling.song@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r-- | backend/src/backend/context.cpp | 8 | ||||
-rw-r--r-- | backend/src/backend/program.cpp | 16 | ||||
-rw-r--r-- | backend/src/backend/program.h | 7 | ||||
-rw-r--r-- | backend/src/backend/program.hpp | 9 | ||||
-rw-r--r-- | backend/src/gbe_bin_interpreter.cpp | 2 | ||||
-rw-r--r-- | backend/src/ir/context.hpp | 1 | ||||
-rw-r--r-- | backend/src/ir/function.cpp | 9 | ||||
-rw-r--r-- | backend/src/ir/function.hpp | 4 | ||||
-rw-r--r-- | backend/src/ir/printf.hpp | 9 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 26 | ||||
-rw-r--r-- | backend/src/llvm/llvm_printf_parser.cpp | 17 | ||||
-rw-r--r-- | src/cl_command_queue_gen7.c | 4 | ||||
-rw-r--r-- | src/cl_driver.h | 2 | ||||
-rw-r--r-- | src/cl_gbe_loader.cpp | 10 | ||||
-rw-r--r-- | src/cl_gbe_loader.h | 2 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 4 |
16 files changed, 96 insertions, 34 deletions
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index dcf3f65..e09a309 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -437,10 +437,7 @@ namespace gbe curbeRegs.insert(std::make_pair(reg, offset)); } ir::Register Context::getSurfaceBaseReg(unsigned char bti) { - map<unsigned char, ir::Register>::iterator iter; - iter = btiRegMap.find(bti); - GBE_ASSERT(iter != btiRegMap.end()); - return iter->second; + return fn.getSurfaceBaseReg(bti); } void Context::buildArgList(void) { @@ -449,8 +446,6 @@ namespace gbe kernel->args = GBE_NEW_ARRAY_NO_ARG(KernelArgument, kernel->argNum); else kernel->args = NULL; - btiRegMap.clear(); - btiRegMap.insert(std::make_pair(1, ir::ocl::stackbuffer)); for (uint32_t argID = 0; argID < kernel->argNum; ++argID) { const auto &arg = fn.getArg(argID); @@ -466,7 +461,6 @@ namespace gbe kernel->args[argID].type = GBE_ARG_GLOBAL_PTR; kernel->args[argID].size = sizeof(void*); kernel->args[argID].bti = arg.bti; - btiRegMap.insert(std::make_pair(arg.bti, arg.reg)); break; case ir::FunctionArgument::CONSTANT_POINTER: kernel->args[argID].type = GBE_ARG_CONSTANT_PTR; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 5edb544..553c907 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -1105,6 +1105,18 @@ namespace gbe { return kernel->dupPrintfSet(); } + static uint8_t kernelGetPrintfBufBTI(void * printf_info) { + if (printf_info == NULL) return 0; + const ir::PrintfSet *ps = (ir::PrintfSet *)printf_info; + return ps->getBufBTI(); + } + + static uint8_t kernelGetPrintfIndexBufBTI(void * printf_info) { + if (printf_info == NULL) return 0; + const ir::PrintfSet *ps = (ir::PrintfSet *)printf_info; + return ps->getIndexBufBTI(); + } + static void kernelReleasePrintfSet(void * printf_info) { if (printf_info == NULL) return; ir::PrintfSet *ps = (ir::PrintfSet *)printf_info; @@ -1213,6 +1225,8 @@ GBE_EXPORT_SYMBOL gbe_set_image_base_index_cb *gbe_set_image_base_index = NULL; GBE_EXPORT_SYMBOL gbe_get_image_base_index_cb *gbe_get_image_base_index = NULL; GBE_EXPORT_SYMBOL gbe_get_printf_num_cb *gbe_get_printf_num = NULL; GBE_EXPORT_SYMBOL gbe_dup_printfset_cb *gbe_dup_printfset = NULL; +GBE_EXPORT_SYMBOL gbe_get_printf_buf_bti_cb *gbe_get_printf_buf_bti = NULL; +GBE_EXPORT_SYMBOL gbe_get_printf_indexbuf_bti_cb *gbe_get_printf_indexbuf_bti = NULL; GBE_EXPORT_SYMBOL gbe_release_printf_info_cb *gbe_release_printf_info = NULL; GBE_EXPORT_SYMBOL gbe_get_printf_sizeof_size_cb *gbe_get_printf_sizeof_size = NULL; GBE_EXPORT_SYMBOL gbe_output_printf_cb *gbe_output_printf = NULL; @@ -1259,6 +1273,8 @@ namespace gbe gbe_get_image_base_index = gbe::getImageBaseIndex; gbe_set_image_base_index = gbe::setImageBaseIndex; gbe_get_printf_num = gbe::kernelGetPrintfNum; + gbe_get_printf_buf_bti = gbe::kernelGetPrintfBufBTI; + gbe_get_printf_indexbuf_bti = gbe::kernelGetPrintfIndexBufBTI; gbe_dup_printfset = gbe::kernelDupPrintfSet; gbe_get_printf_sizeof_size = gbe::kernelGetPrintfSizeOfSize; gbe_release_printf_info = gbe::kernelReleasePrintfSet; diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index 330a3de..39ff402 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -133,6 +133,13 @@ extern gbe_kernel_get_image_data_cb *gbe_kernel_get_image_data; typedef uint32_t (gbe_get_printf_num_cb)(void* printf_info); extern gbe_get_printf_num_cb *gbe_get_printf_num; +/*! Get the printf buffer bti */ +typedef uint8_t (gbe_get_printf_buf_bti_cb)(void* printf_info); +extern gbe_get_printf_buf_bti_cb *gbe_get_printf_buf_bti; + +typedef uint8_t (gbe_get_printf_indexbuf_bti_cb)(void* printf_info); +extern gbe_get_printf_indexbuf_bti_cb *gbe_get_printf_indexbuf_bti; + /*! Release the printfset */ typedef void (gbe_release_printf_info_cb)(void* printf_info); extern gbe_release_printf_info_cb *gbe_release_printf_info; diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp index a6303b9..8f5f125 100644 --- a/backend/src/backend/program.hpp +++ b/backend/src/backend/program.hpp @@ -145,6 +145,15 @@ namespace gbe { void* ptr = printfSet ? (void *)(new ir::PrintfSet(*printfSet)) : NULL; return ptr; } + uint8_t getPrintfBufBTI() const { + GBE_ASSERT(printfSet); + return printfSet->getBufBTI(); + } + + uint8_t getPrintfIndexBufBTI() const { + GBE_ASSERT(printfSet); + return printfSet->getIndexBufBTI(); + } void outputPrintf(void* index_addr, void* buf_addr, size_t global_wk_sz0, size_t global_wk_sz1, size_t global_wk_sz2) { diff --git a/backend/src/gbe_bin_interpreter.cpp b/backend/src/gbe_bin_interpreter.cpp index 2f02b34..ffca1f5 100644 --- a/backend/src/gbe_bin_interpreter.cpp +++ b/backend/src/gbe_bin_interpreter.cpp @@ -66,6 +66,8 @@ struct BinInterpCallBackInitializer gbe_get_image_base_index = gbe::getImageBaseIndex; gbe_set_image_base_index = gbe::setImageBaseIndex; gbe_get_printf_num = gbe::kernelGetPrintfNum; + gbe_get_printf_buf_bti = gbe::kernelGetPrintfBufBTI; + gbe_get_printf_indexbuf_bti = gbe::kernelGetPrintfIndexBufBTI; gbe_dup_printfset = gbe::kernelDupPrintfSet; gbe_get_printf_sizeof_size = gbe::kernelGetPrintfSizeOfSize; gbe_release_printf_info = gbe::kernelReleasePrintfSet; diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index d426aee..cd09413 100644 --- a/backend/src/ir/context.hpp +++ b/backend/src/ir/context.hpp @@ -206,6 +206,7 @@ namespace ir { GBE_ASSERT(valueNum > 0); this->STORE(type, index, offset, space, valueNum, dwAligned, bti); } + void appendSurface(uint8_t bti, Register reg) { fn->appendSurface(bti, reg); } protected: /*! A block must be started with a label */ diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp index 988ea2b..85e7934 100644 --- a/backend/src/ir/function.cpp +++ b/backend/src/ir/function.cpp @@ -222,6 +222,15 @@ namespace ir { const uint32_t specialNum = this->getSpecialRegNum(); return ID >= firstID && ID < firstID + specialNum; } + Register Function::getSurfaceBaseReg(uint8_t bti) const { + map<uint8_t, Register>::const_iterator iter = btiRegMap.find(bti); + GBE_ASSERT(iter != btiRegMap.end()); + return iter->second; + } + + void Function::appendSurface(uint8_t bti, Register reg) { + btiRegMap.insert(std::make_pair(bti, reg)); + } void Function::computeCFG(void) { // Clear possible previously computed CFG and compute the direct diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index 0f84aa0..2f90c2f 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -355,6 +355,9 @@ namespace ir { /*! add the loop info for later liveness analysis */ void addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits); INLINE const vector<Loop * > &getLoops() { return loops; } + /*! Get surface starting address register from bti */ + Register getSurfaceBaseReg(uint8_t bti) const; + void appendSurface(uint8_t bti, Register reg); private: friend class Context; //!< Can freely modify a function std::string name; //!< Function name @@ -365,6 +368,7 @@ namespace ir { vector<Immediate> immediates; //!< All immediate values in the function vector<BasicBlock*> blocks; //!< All chained basic blocks vector<Loop *> loops; //!< Loops info of the function + map<uint8_t, Register> btiRegMap;//!< map bti to surface base address RegisterFile file; //!< RegisterDatas used by the instructions Profile profile; //!< Current function profile PushMap pushMap; //!< Pushed function arguments (reg->loc) diff --git a/backend/src/ir/printf.hpp b/backend/src/ir/printf.hpp index 1aef767..4db7245 100644 --- a/backend/src/ir/printf.hpp +++ b/backend/src/ir/printf.hpp @@ -155,6 +155,8 @@ namespace gbe } sizeOfSize = other.sizeOfSize; + btiBuf = other.btiBuf; + btiIndexBuf = other.btiIndexBuf; } PrintfSet(void) = default; @@ -180,6 +182,11 @@ namespace gbe return sizeOfSize; } + void setBufBTI(uint8_t b) { btiBuf = b; } + void setIndexBufBTI(uint8_t b) { btiIndexBuf = b; } + uint8_t getBufBTI() const { return btiBuf; } + uint8_t getIndexBufBTI() const { return btiIndexBuf; } + uint32_t getPrintfBufferElementSize(uint32_t i) { PrintfSlot* slot = slots[i]; int vec_num = 1; @@ -226,6 +233,8 @@ namespace gbe vector<PrintfSlot*> slots; uint32_t sizeOfSize; // Total sizeof size. friend struct LockOutput; + uint8_t btiBuf; + uint8_t btiIndexBuf; static pthread_mutex_t lock; GBE_CLASS(PrintfSet); }; diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index ed19054..39ae384 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1287,6 +1287,7 @@ namespace gbe argNameNode = attrNode; } } + ctx.appendSurface(1, ir::ocl::stackbuffer); ctx.getFunction().setCompileWorkGroupSize(reqd_wg_sz[0], reqd_wg_sz[1], reqd_wg_sz[2]); // Loop over the arguments and output registers for them @@ -1361,6 +1362,7 @@ namespace gbe switch (addrSpace) { case ir::MEM_GLOBAL: globalPointer.insert(std::make_pair(I, btiBase)); + ctx.appendSurface(btiBase, reg); ctx.input(argName, ir::FunctionArgument::GLOBAL_POINTER, reg, llvmInfo, ptrSize, align, btiBase); btiBase++; break; @@ -1716,18 +1718,16 @@ namespace gbe GBE_ASSERT(con.getName() == v.getName()); ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32)); } else { - if(v.getName().str().substr(0, 4) == ".str") { - /* When there are multi printf statements in multi kernel fucntions within the same - translate unit, if they have the same sting parameter, such as - kernel_func1 () { - printf("Line is %d\n", line_num1); - } - kernel_func2 () { - printf("Line is %d\n", line_num2); - } - The Clang will just generate one global string named .strXXX to represent "Line is %d\n" - So when translating the kernel_func1, we can not unref that global var, so we will - get here. Just ignore it to avoid assert. */ + if(v.getName().equals(StringRef("__gen_ocl_printf_buf"))) { + ctx.appendSurface(btiBase, ir::ocl::printfbptr); + ctx.getFunction().getPrintfSet()->setBufBTI(btiBase); + globalPointer.insert(std::make_pair(&v, btiBase++)); + regTranslator.newScalarProxy(ir::ocl::printfbptr, const_cast<GlobalVariable*>(&v)); + } else if(v.getName().equals(StringRef("__gen_ocl_printf_index_buf"))) { + ctx.appendSurface(btiBase, ir::ocl::printfiptr); + ctx.getFunction().getPrintfSet()->setIndexBufBTI(btiBase); + globalPointer.insert(std::make_pair(&v, btiBase++)); + regTranslator.newScalarProxy(ir::ocl::printfiptr, const_cast<GlobalVariable*>(&v)); } else { GBE_ASSERT(0); } @@ -3367,7 +3367,7 @@ handle_write_image: bool isPrivate = false; p = candidates[idx]; - while (dyn_cast<User>(p)) { + while (dyn_cast<User>(p) && !dyn_cast<GlobalVariable>(p)) { if (processed.find(p) == processed.end()) { processed.insert(p); diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp index 00aa4a3..00e1ef8 100644 --- a/backend/src/llvm/llvm_printf_parser.cpp +++ b/backend/src/llvm/llvm_printf_parser.cpp @@ -467,7 +467,7 @@ error: // (index_offset + offset)* sizeof(int) op0 = builder->CreateMul(op0, ConstantInt::get(intTy, sizeof(int))); // Final index address = index_buf_ptr + (index_offset + offset)* sizeof(int) - op0 = builder->CreateAdd(op0, index_buf_ptr); + op0 = builder->CreateAdd(index_buf_ptr, op0); Value* index_addr = builder->CreateIntToPtr(op0, Type::getInt32PtrTy(module->getContext(), 1)); builder->CreateStore(ConstantInt::get(intTy, 1), index_addr);// The flag @@ -507,7 +507,7 @@ error: //offset * sizeof(specify) val = builder->CreateMul(offset, ConstantInt::get(intTy, sizeof_size)); //data_offset + pbuf_ptr - op0 = builder->CreateAdd(op0, pbuf_ptr); + op0 = builder->CreateAdd(pbuf_ptr, op0); op0 = builder->CreateAdd(op0, val); data_addr = builder->CreateIntToPtr(op0, dst_type); builder->CreateStore(out_arg, data_addr); @@ -575,15 +575,14 @@ error: if (!pbuf_ptr) { /* alloc a new buffer ptr to collect the print output. */ - pbuf_ptr = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( - "__gen_ocl_printf_get_buf_addr", Type::getInt32Ty(module->getContext()), - NULL))); + Type *ptrTy = Type::getInt32PtrTy(module->getContext()); + llvm::Constant * pBuf = module->getOrInsertGlobal(StringRef("__gen_ocl_printf_buf"), ptrTy); + pbuf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext())); } if (!index_buf_ptr) { - /* alloc a new buffer ptr to collect the print valid index. */ - index_buf_ptr = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( - "__gen_ocl_printf_get_index_buf_addr", Type::getInt32Ty(module->getContext()), - NULL))); + Type *ptrTy = Type::getInt32PtrTy(module->getContext()); + llvm::Constant * pBuf = module->getOrInsertGlobal(StringRef("__gen_ocl_printf_index_buf"), ptrTy); + index_buf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext())); } deadprintfs.push_back(PrintfInst(cast<Instruction>(call),parseOnePrintfInstruction(call))); diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index a4c8af7..330f0f9 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -264,7 +264,7 @@ cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, int32_t offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0); size_t buf_size = global_sz * sizeof(int) * printf_num; if (offset > 0) { - if (cl_gpgpu_set_printf_buffer(gpgpu, 0, buf_size, offset) != 0) + if (cl_gpgpu_set_printf_buffer(gpgpu, 0, buf_size, offset, interp_get_printf_indexbuf_bti(printf_info)) != 0) return -1; } @@ -272,7 +272,7 @@ cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0); buf_size = interp_get_printf_sizeof_size(printf_info) * global_sz; if (offset > 0) { - if (cl_gpgpu_set_printf_buffer(gpgpu, 1, buf_size, offset) != 0) + if (cl_gpgpu_set_printf_buffer(gpgpu, 1, buf_size, offset, interp_get_printf_buf_bti(printf_info)) != 0) return -1; } return 0; diff --git a/src/cl_driver.h b/src/cl_driver.h index 461c11e..9cdba98 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -226,7 +226,7 @@ typedef void (cl_gpgpu_unref_batch_buf_cb)(void*); extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf; /* Set the printf buffer */ -typedef int (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t); +typedef int (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t, uint8_t); extern cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer; /* get the printf buffer offset in the apeture*/ diff --git a/src/cl_gbe_loader.cpp b/src/cl_gbe_loader.cpp index 5f2f9ce..9d609c7 100644 --- a/src/cl_gbe_loader.cpp +++ b/src/cl_gbe_loader.cpp @@ -66,6 +66,8 @@ gbe_kernel_get_image_data_cb *interp_kernel_get_image_data = NULL; gbe_set_image_base_index_cb *interp_set_image_base_index = NULL; gbe_get_image_base_index_cb *interp_get_image_base_index = NULL; gbe_get_printf_num_cb* interp_get_printf_num = NULL; +gbe_get_printf_buf_bti_cb* interp_get_printf_buf_bti = NULL; +gbe_get_printf_indexbuf_bti_cb* interp_get_printf_indexbuf_bti = NULL; gbe_dup_printfset_cb* interp_dup_printfset = NULL; gbe_get_printf_sizeof_size_cb* interp_get_printf_sizeof_size = NULL; gbe_release_printf_info_cb* interp_release_printf_info = NULL; @@ -220,6 +222,14 @@ struct GbeLoaderInitializer if (interp_get_printf_num == NULL) return false; + interp_get_printf_buf_bti = *(gbe_get_printf_buf_bti_cb**)dlsym(dlhInterp, "gbe_get_printf_buf_bti"); + if (interp_get_printf_buf_bti == NULL) + return false; + + interp_get_printf_indexbuf_bti = *(gbe_get_printf_indexbuf_bti_cb**)dlsym(dlhInterp, "gbe_get_printf_indexbuf_bti"); + if (interp_get_printf_indexbuf_bti == NULL) + return false; + interp_dup_printfset = *(gbe_dup_printfset_cb**)dlsym(dlhInterp, "gbe_dup_printfset"); if (interp_dup_printfset == NULL) return false; diff --git a/src/cl_gbe_loader.h b/src/cl_gbe_loader.h index d095240..6cbc99e 100644 --- a/src/cl_gbe_loader.h +++ b/src/cl_gbe_loader.h @@ -66,6 +66,8 @@ extern gbe_kernel_get_image_data_cb *interp_kernel_get_image_data; extern gbe_set_image_base_index_cb *interp_set_image_base_index; extern gbe_get_image_base_index_cb *interp_get_image_base_index; extern gbe_get_printf_num_cb* interp_get_printf_num; +extern gbe_get_printf_buf_bti_cb* interp_get_printf_buf_bti; +extern gbe_get_printf_indexbuf_bti_cb* interp_get_printf_indexbuf_bti; extern gbe_dup_printfset_cb* interp_dup_printfset; extern gbe_get_printf_sizeof_size_cb* interp_get_printf_sizeof_size; extern gbe_release_printf_info_cb* interp_release_printf_info; diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 1df0255..867ab4c 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -1353,7 +1353,7 @@ intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, intel_event_t *event, } static int -intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset) +intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset, uint8_t bti) { drm_intel_bo *bo = NULL; if (i == 0) { // the index buffer. @@ -1378,7 +1378,7 @@ intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint } memset(bo->virtual, 0, size); drm_intel_bo_unmap(bo); - intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, size, 0); + intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, size, bti); return 0; } |