diff options
author | Ruiling Song <ruiling.song@intel.com> | 2014-12-01 16:56:59 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-12-03 15:55:51 +0800 |
commit | da07d7a43d1fecd57a7ed2d35aa4670b7da7c8c8 (patch) | |
tree | 0a37b6be367a78d2447111cb115c620054b12031 | |
parent | e2b161bc981f614ff230f4cda94c2cd7818c3ebc (diff) |
GBE: Re-implement BTI logic in backend
Previously, we search from the use-point of pointers, like load/store
and try to find all the possible pointer sources. But sometimes we may meet
ptrtoint/add/inttoptr pattern, and what's worse, for the operands of
add instruction, it is hard to determine which one is from pointer and
which one maybe a offset.
So what we do in this patch is: let's start the search from the def-point
(like GlobalVariable, kernel function pointer argument, AllocaInst, which
we care about) and traversal all their uses. And during the traversal,
we will record the escape point(i.e. Store/load/atomic instructions).
So later, when we generate these kinds of instructions, we can query their
possible sources and get the corresponding BTI.
v2:
refine the error message when found an illegal pointer.
Signed-off-by: Ruiling Song <ruiling.song@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 40 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 228 |
2 files changed, 159 insertions, 109 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index cd968c09..96970c74 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -3291,6 +3291,18 @@ namespace gbe } } + INLINE GenRegister getRelativeAddress(Selection::Opaque &sel, GenRegister address, ir::AddressSpace space, uint8_t bti) const { + if(space == ir::MEM_LOCAL || space == ir::MEM_CONSTANT) + return address; + + sel.push(); + sel.curr.noMask = 1; + GenRegister temp = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32); + sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti), ir::TYPE_U32))); + sel.pop(); + return temp; + } + INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn, bool &markChildren) const { using namespace ir; @@ -3303,28 +3315,16 @@ namespace gbe sel.ADD(temp, address, sel.selReg(ocl::slmoffset, ir::TYPE_U32)); address = temp; } - if(space == MEM_LOCAL) { + + BTI bti = insn.getBTI(); + for (int x = 0; x < bti.count; x++) { + GenRegister temp = getRelativeAddress(sel, address, space, bti.bti[x]); if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD) - this->emitWrite64(sel, insn, address, 0xfe); + this->emitWrite64(sel, insn, temp, bti.bti[x]); else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD) - this->emitUntypedWrite(sel, insn, address, 0xfe); - else - this->emitByteScatter(sel, insn, elemSize, address, 0xfe); - } else { - BTI bti = insn.getBTI(); - for (int x = 0; x < bti.count; x++) { - GenRegister temp = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32); - sel.push(); - sel.curr.noMask = 1; - sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti.bti[x]), ir::TYPE_U32))); - sel.pop(); - if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD) - this->emitWrite64(sel, insn, temp, bti.bti[x]); - else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD) - this->emitUntypedWrite(sel, insn, temp, bti.bti[x]); - else { - this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]); - } + this->emitUntypedWrite(sel, insn, temp, bti.bti[x]); + else { + this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]); } } return true; diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index a6f92157..b6cb4c73 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -464,10 +464,16 @@ namespace gbe */ set<const Value*> conditionSet; map<const Value*, int> globalPointer; + typedef map<const Value*, int>::iterator GlobalPtrIter; + /*! * <phi,phiCopy> node information for later optimization */ map<const ir::Register, const ir::Register> phiMap; + + map<Value *, SmallVector<Value *, 4>> pointerOrigMap; + typedef map<Value *, SmallVector<Value *, 4>>::iterator PtrOrigMapIter; + /*! We visit each function twice. Once to allocate the registers and once to * emit the Gen IR instructions */ @@ -529,14 +535,22 @@ namespace gbe bool bKernel = isKernelFunction(F); if(!bKernel) return false; + analyzePointerOrigin(F); LI = &getAnalysis<LoopInfo>(); emitFunction(F); phiMap.clear(); globalPointer.clear(); + pointerOrigMap.clear(); // Reset for next function btiBase = BTI_RESERVED_NUM; return false; } + /*! Given a possible pointer value, find out the interested escape like + load/store or atomic instruction */ + void findPointerEscape(Value *ptr); + /*! For all possible pointers, GlobalVariable, function pointer argument, + alloca instruction, find their pointer escape points */ + void analyzePointerOrigin(Function &F); virtual bool doFinalization(Module &M) { return false; } /*! handle global variable register allocation (local, constant space) */ @@ -647,6 +661,83 @@ namespace gbe }; char GenWriter::ID = 0; + + void GenWriter::findPointerEscape(Value *ptr) { + std::vector<Value*> workList; + std::set<Value *> visited; + + if (ptr->use_empty()) return; + + workList.push_back(ptr); + + for (unsigned i = 0; i < workList.size(); i++) { + Value *work = workList[i]; + if (work->use_empty()) continue; + + for (Value::use_iterator iter = work->use_begin(); iter != work->use_end(); ++iter) { + // After LLVM 3.5, use_iterator points to 'Use' instead of 'User', + // which is more straightforward. + #if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR < 5) + User *theUser = *iter; + #else + User *theUser = iter->getUser(); + #endif + if (visited.find(theUser) != visited.end()) continue; + // pointer address is used as the ValueOperand in store instruction, should be skipped + if (StoreInst *load = dyn_cast<StoreInst>(theUser)) { + if (load->getValueOperand() == work) { + continue; + } + } + + visited.insert(theUser); + + if (isa<LoadInst>(theUser) || isa<StoreInst>(theUser) || isa<CallInst>(theUser)) { + if (isa<CallInst>(theUser)) { + Function *F = dyn_cast<CallInst>(theUser)->getCalledFunction(); + if (!F || F->getIntrinsicID() != 0) continue; + } + + PtrOrigMapIter ptrIter = pointerOrigMap.find(theUser); + if (ptrIter == pointerOrigMap.end()) { + // create new one + SmallVector<Value *, 4> pointers; + pointers.push_back(ptr); + pointerOrigMap.insert(std::make_pair(theUser, pointers)); + } else { + // append it + (*ptrIter).second.push_back(ptr); + } + } else { + workList.push_back(theUser); + } + } + } + } + + void GenWriter::analyzePointerOrigin(Function &F) { + // GlobalVariable + Module::GlobalListType &globalList = const_cast<Module::GlobalListType &> (TheModule->getGlobalList()); + for(auto i = globalList.begin(); i != globalList.end(); i ++) { + GlobalVariable &v = *i; + if(!v.isConstantUsed()) continue; + findPointerEscape(&v); + } + // function argument + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { + if (I->getType()->isPointerTy()) { + findPointerEscape(I); + } + } + // alloca + BasicBlock &bb = F.getEntryBlock(); + for (BasicBlock::iterator iter = bb.begin(), iterE = bb.end(); iter != iterE; ++iter) { + if (AllocaInst *ai = dyn_cast<AllocaInst>(iter)) { + findPointerEscape(ai); + } + } + } + void getSequentialData(const ConstantDataSequential *cda, void *ptr, uint32_t &offset) { StringRef data = cda->getRawDataValues(); memcpy((char*)ptr+offset, data.data(), data.size()); @@ -2897,7 +2988,7 @@ namespace gbe const ir::Register dst = this->getRegister(&I); ir::BTI bti; - gatherBTI(*AI, bti); + gatherBTI(&I, bti); vector<ir::Register> src; uint32_t srcNum = 0; while(AI != AE) { @@ -3772,96 +3863,56 @@ handle_write_image: } // The idea behind is to search along the use-def chain, and find out all - // possible source of the pointer. Then in later codeGen, we can emit - // read/store instructions to these btis gathered. - void GenWriter::gatherBTI(Value *pointer, ir::BTI &bti) { - typedef map<const Value*, int>::iterator GlobalPtrIter; - Value *p; - size_t idx = 0; - int nBTI = 0; - std::vector<Value*> candidates; - candidates.push_back(pointer); - std::set<Value*> processed; - - while (idx < candidates.size()) { - bool isPrivate = false; - bool needNewBTI = true; - p = candidates[idx]; - - while (dyn_cast<User>(p) && !dyn_cast<GlobalVariable>(p)) { - - if (processed.find(p) == processed.end()) { - processed.insert(p); - } else { - // This use-def chain falls into a loop, - // it does not introduce a new buffer source. - needNewBTI = false; - break; - } - - if (dyn_cast<SelectInst>(p)) { - SelectInst *sel = cast<SelectInst>(p); - p = sel->getTrueValue(); - candidates.push_back(sel->getFalseValue()); - continue; - } - - if (dyn_cast<PHINode>(p)) { - PHINode* phi = cast<PHINode>(p); - int n = phi->getNumIncomingValues(); - for (int j = 1; j < n; j++) - candidates.push_back(phi->getIncomingValue(j)); - p = phi->getIncomingValue(0); - continue; - } - - if (dyn_cast<AllocaInst>(p)) { - isPrivate = true; - break; + // possible sources of the pointer. Then in later codeGen, we can emit + // read/store instructions to these BTIs gathered. + void GenWriter::gatherBTI(Value *insn, ir::BTI &bti) { + PtrOrigMapIter iter = pointerOrigMap.find(insn); + if (iter != pointerOrigMap.end()) { + SmallVectorImpl<Value *> &origins = iter->second; + uint8_t nBTI = 0; + for (unsigned i = 0; i < origins.size(); i++) { + uint8_t new_bti = 0; + Value *origin = origins[i]; + unsigned space = origin->getType()->getPointerAddressSpace(); + switch (space) { + case 0: + new_bti = BTI_PRIVATE; + break; + case 1: + { + GlobalPtrIter iter = globalPointer.find(origin); + GBE_ASSERT(iter != globalPointer.end()); + new_bti = iter->second; + break; + } + case 2: + new_bti = BTI_CONSTANT; + break; + case 3: + new_bti = 0xfe; + break; + default: + GBE_ASSERT(0 && "address space not unhandled in gatherBTI()\n"); + break; } - p = cast<User>(p)->getOperand(0); - } - - if (needNewBTI == false) { - // go to next possible pointer source - idx++; continue; - } - uint8_t new_bti = 0; - if (isPrivate) { - new_bti = BTI_PRIVATE; - } else { - if(isa<Argument>(p) && dyn_cast<Argument>(p)->hasByValAttr()) { - // structure value implementation is not complete now, - // they are now treated as push constant, so, the load/store - // here is not as meaningful. - bti.bti[0] = BTI_PRIVATE; - bti.count = 1; - break; - } - Type *ty = p->getType(); - if(ty->getPointerAddressSpace() == 3) { - // __local memory - new_bti = 0xfe; - } else { - // __global memory - GlobalPtrIter iter = globalPointer.find(p); - GBE_ASSERT(iter != globalPointer.end()); - new_bti = iter->second; + // avoid duplicate + bool bFound = false; + for (int j = 0; j < nBTI; j++) { + if (bti.bti[j] == new_bti) { + bFound = true; break; + } } - } - // avoid duplicate - bool bFound = false; - for (int j = 0; j < nBTI; j++) { - if (bti.bti[j] == new_bti) { - bFound = true; break; + if (bFound == false) { + bti.bti[nBTI++] = new_bti; + bti.count = nBTI; } } - if (bFound == false) { - bti.bti[nBTI++] = new_bti; - bti.count = nBTI; - } - idx++; + } else { + insn->dump(); + std::cerr << "Illegal pointer which is not from a valid memory space." << std::endl; + std::cerr << "Aborting..." << std::endl; + exit(-1); } GBE_ASSERT(bti.count <= MAX_MIXED_POINTER); } @@ -3878,9 +3929,8 @@ handle_write_image: const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace); const ir::Register ptr = this->getRegister(llvmPtr); ir::BTI binding; - if(addrSpace == ir::MEM_GLOBAL || addrSpace == ir::MEM_PRIVATE) { - gatherBTI(llvmPtr, binding); - } + gatherBTI(&I, binding); + // Scalar is easy. We neednot build register tuples if (isScalarType(llvmType) == true) { const ir::Type type = getType(ctx, llvmType); |