diff options
author | Homer Hsing <homer.xing@intel.com> | 2013-05-02 09:00:31 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@linux.intel.com> | 2013-05-02 10:46:58 +0800 |
commit | d0f10c38843b1f8f1e3a688b86a4139661816583 (patch) | |
tree | 6b8c8d65754630758c67250b57eb7eef6441e835 | |
parent | 29e29dcfe7060be1fbe75c0b5bdb2978e0a59128 (diff) |
Support global constant arrays
Version 3.
Support global constant arrays defined outside any kernel.
Example:
constant int h[] = {71,72,73,74,75,76,77};
kernel void k(global int *dst) {
int i = get_global_id(0);
dst[i] = h[i % 7];
}
Signed-off-by: Homer Hsing <homer.xing@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r-- | backend/src/backend/context.cpp | 12 | ||||
-rw-r--r-- | backend/src/backend/gen_reg_allocation.cpp | 1 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 86 | ||||
-rw-r--r-- | src/cl_command_queue_gen7.c | 8 |
4 files changed, 106 insertions, 1 deletions
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index 91d8d8c7..c636b488 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -419,6 +419,15 @@ namespace gbe } }); #undef INSERT_REG + this->newCurbeEntry(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, 0, sizeof(int)); + specialRegs.insert(ir::ocl::constoffst); + + // Insert serialized global constant arrays if used + const ir::ConstantSet& constantSet = unit.getConstantSet(); + if (constantSet.getConstantNum()) { + size_t size = constantSet.getDataSize(); + this->newCurbeEntry(GBE_CURBE_GLOBAL_CONSTANT_DATA, 0, size); + } // Insert the number of threads this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t)); @@ -591,7 +600,8 @@ namespace gbe reg == ir::ocl::gsize2 || reg == ir::ocl::goffset0 || reg == ir::ocl::goffset1 || - reg == ir::ocl::goffset2) + reg == ir::ocl::goffset2 || + reg == ir::ocl::constoffst) return true; return false; } diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index 10e4ab6f..8c9f3582 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -523,6 +523,7 @@ namespace gbe allocatePayloadReg(GBE_CURBE_GROUP_NUM_Z, ocl::numgroup2); allocatePayloadReg(GBE_CURBE_STACK_POINTER, ocl::stackptr); allocatePayloadReg(GBE_CURBE_THREAD_NUM, ocl::threadn); + allocatePayloadReg(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, ocl::constoffst); // Group and barrier IDs are always allocated by the hardware in r0 RA.insert(std::make_pair(ocl::groupid0, 1*sizeof(float))); // r0.1 diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 42265ee6..637e7be8 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -455,6 +455,8 @@ namespace gbe virtual bool doInitialization(Module &M); + void collectGlobalConstant(void) const; + bool runOnFunction(Function &F) { // Do not codegen any 'available_externally' functions at all, they have // definitions outside the translation unit. @@ -550,11 +552,60 @@ namespace gbe char GenWriter::ID = 0; + void GenWriter::collectGlobalConstant(void) const { + const Module::GlobalListType &globalList = TheModule->getGlobalList(); + for(auto i = globalList.begin(); i != globalList.end(); i ++) { + const GlobalVariable &v = *i; + const char *name = v.getName().data(); + unsigned addrSpace = v.getType()->getAddressSpace(); + if(addrSpace == ir::AddressSpace::MEM_CONSTANT) { + GBE_ASSERT(v.hasInitializer()); + const Constant *c = v.getInitializer(); + GBE_ASSERT(c->getType()->getTypeID() == Type::ArrayTyID); + const ConstantDataArray *cda = dyn_cast<ConstantDataArray>(c); + GBE_ASSERT(cda); + unsigned len = cda->getNumElements(); + uint64_t elementSize = cda->getElementByteSize(); + Type::TypeID typeID = cda->getElementType()->getTypeID(); + if(typeID == Type::TypeID::IntegerTyID) + elementSize = sizeof(unsigned); + void *mem = malloc(elementSize * len); + for(unsigned j = 0; j < len; j ++) { + switch(typeID) { + case Type::TypeID::FloatTyID: + { + float f = cda->getElementAsFloat(j); + memcpy((float *)mem + j, &f, elementSize); + } + break; + case Type::TypeID::DoubleTyID: + { + double d = cda->getElementAsDouble(j); + memcpy((double *)mem + j, &d, elementSize); + } + break; + case Type::TypeID::IntegerTyID: + { + unsigned u = (unsigned) cda->getElementAsInteger(j); + memcpy((unsigned *)mem + j, &u, elementSize); + } + break; + default: + NOT_IMPLEMENTED; + } + } + unit.newConstant((char *)mem, name, elementSize * len, sizeof(unsigned)); + free(mem); + } + } + } + bool GenWriter::doInitialization(Module &M) { FunctionPass::doInitialization(M); // Initialize TheModule = &M; + collectGlobalConstant(); return false; } @@ -704,6 +755,17 @@ namespace gbe } ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) { + if (dyn_cast<ConstantExpr>(value)) { + ConstantExpr *ce = dyn_cast<ConstantExpr>(value); + if(ce->isCast()) { + GBE_ASSERT(ce->getOpcode() == Instruction::PtrToInt); + const Value *pointer = ce->getOperand(0); + GBE_ASSERT(pointer->hasName()); + auto name = pointer->getName().str(); + uint16_t reg = unit.getConstantSet().getConstant(name).getReg(); + return ir::Register(reg); + } + } Constant *CPV = dyn_cast<Constant>(value); if (CPV) { GBE_ASSERT(isa<GlobalValue>(CPV) == false); @@ -1076,6 +1138,30 @@ namespace gbe this->labelMap.clear(); this->emitFunctionPrototype(F); + // Allocate a virtual register for each global constant array + const Module::GlobalListType &globalList = TheModule->getGlobalList(); + size_t j = 0; + for(auto i = globalList.begin(); i != globalList.end(); i ++) { + const GlobalVariable &v = *i; + unsigned addrSpace = v.getType()->getAddressSpace(); + if(addrSpace != ir::AddressSpace::MEM_CONSTANT) + continue; + GBE_ASSERT(v.hasInitializer()); + const Constant *c = v.getInitializer(); + GBE_ASSERT(c->getType()->getTypeID() == Type::ArrayTyID); + const ConstantDataArray *cda = dyn_cast<ConstantDataArray>(c); + GBE_ASSERT(cda); + ir::Register reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD); + ir::Constant &con = unit.getConstantSet().getConstant(j ++); + con.setReg(reg.value()); + if(con.getOffset() != 0) { + ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32)); + ctx.ADD(ir::TYPE_S32, reg, ir::ocl::constoffst, reg); + } else { + ctx.MOV(ir::TYPE_S32, reg, ir::ocl::constoffst); + } + } + // Visit all the instructions and emit the IR registers or the value to // value mapping when a new register is not needed pass = PASS_EMIT_REGISTERS; diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 9402549c..108684f0 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -120,6 +120,7 @@ cl_curbe_fill(cl_kernel ker, UPLOAD(GBE_CURBE_GROUP_NUM_Y, global_wk_sz[1]/local_wk_sz[1]); UPLOAD(GBE_CURBE_GROUP_NUM_Z, global_wk_sz[2]/local_wk_sz[2]); UPLOAD(GBE_CURBE_THREAD_NUM, thread_n); + UPLOAD(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_GLOBAL_CONSTANT_DATA, 0) + 32); #undef UPLOAD /* Write identity for the stack pointer. This is required by the stack pointer @@ -132,6 +133,13 @@ cl_curbe_fill(cl_kernel ker, for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i; } + /* Write global constant arrays */ + if ((offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_GLOBAL_CONSTANT_DATA, 0)) >= 0) { + /* Write the global constant arrays */ + gbe_program prog = ker->program->opaque; + gbe_program_get_global_constant_data(prog, ker->curbe + offset); + } + /* Handle the various offsets to SLM */ const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque); int32_t arg, slm_offset = 0; |