diff options
-rw-r--r-- | backend/src/backend/context.cpp | 2 | ||||
-rw-r--r-- | backend/src/backend/context.hpp | 8 | ||||
-rw-r--r-- | backend/src/backend/gen_context.cpp | 29 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 89 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.hpp | 1 | ||||
-rw-r--r-- | backend/src/backend/program.h | 1 | ||||
-rw-r--r-- | backend/src/ir/function.cpp | 4 | ||||
-rw-r--r-- | backend/src/ir/profile.cpp | 2 | ||||
-rw-r--r-- | backend/src/ir/profile.hpp | 5 | ||||
-rw-r--r-- | src/cl_command_queue_gen7.c | 16 |
10 files changed, 128 insertions, 29 deletions
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index 5e33ddd6..59ccc79d 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -354,6 +354,8 @@ namespace gbe this->kernel = this->allocateKernel(); this->kernel->simdWidth = this->simdWidth; this->buildArgList(); + if (fn.labelNum() > 0xffff) + this->useDWLabel = true; if (usedLabels.size() == 0) this->buildUsedLabels(); if (JIPs.size() == 0) diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp index 1b3744b2..faa7c8aa 100644 --- a/backend/src/backend/context.hpp +++ b/backend/src/backend/context.hpp @@ -100,6 +100,13 @@ namespace gbe /*! Preallocated curbe register set including special registers. */ map<ir::Register, uint32_t> curbeRegs; ir::Register getSurfaceBaseReg(unsigned char bti); + /* Indicate whether we should use DW label or W label in backend.*/ + bool isDWLabel(void) const { + return useDWLabel; + } + uint32_t getMaxLabel(void) const { + return this->isDWLabel() ? 0xffffffff : 0xffff; + } protected: /*! Build the instruction stream. Return false if failed */ virtual bool emitCode(void) = 0; @@ -140,6 +147,7 @@ namespace gbe set<ir::LabelIndex> usedLabels; //!< Set of all used labels JIPMap JIPs; //!< Where to jump all labels/branches uint32_t simdWidth; //!< Number of lanes per HW threads + bool useDWLabel; //!< false means using u16 label, true means using u32 label. map<unsigned char, ir::Register> btiRegMap; GBE_CLASS(Context); //!< Use custom allocators }; diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index cdf581c4..094e6b49 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -134,18 +134,36 @@ namespace gbe return true; } + /* Get proper block ip register according to current label width. */ + static GenRegister getBlockIP(GenContext &ctx) { + GenRegister blockip; + if (!ctx.isDWLabel()) + blockip = ctx.ra->genReg(GenRegister::uw8grf(ir::ocl::blockip)); + else + blockip = ctx.ra->genReg(GenRegister::ud8grf(ir::ocl::dwblockip)); + return blockip; + } + + /* Set current block ip register to a specified constant label value. */ + static void setBlockIP(GenContext &ctx, GenRegister blockip, uint32_t label) { + if (!ctx.isDWLabel()) + ctx.p->MOV(blockip, GenRegister::immuw(label)); + else + ctx.p->MOV(blockip, GenRegister::immud(label)); + } + void GenContext::clearFlagRegister(void) { // when group size not aligned to simdWidth, flag register need clear to // make prediction(any8/16h) work correctly - const GenRegister blockip = ra->genReg(GenRegister::uw8grf(ir::ocl::blockip)); + const GenRegister blockip = getBlockIP(*this); const GenRegister zero = ra->genReg(GenRegister::uw1grf(ir::ocl::zero)); const GenRegister one = ra->genReg(GenRegister::uw1grf(ir::ocl::one)); p->push(); p->curr.noMask = 1; p->curr.predicate = GEN_PREDICATE_NONE; - p->MOV(blockip, GenRegister::immuw(GEN_MAX_LABEL)); + setBlockIP(*this, blockip, getMaxLabel()); p->curr.noMask = 0; - p->MOV(blockip, GenRegister::immuw(0)); + setBlockIP(*this, blockip, 0); p->curr.execWidth = 1; // FIXME, need to get the final use set of zero/one, if there is no user, // no need to generate the following two instructions. @@ -1990,7 +2008,10 @@ namespace gbe // We insert the block IP mask first using namespace ir::ocl; - allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP); + if (!isDWLabel()) + allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP); + else + allocCurbeReg(dwblockip, GBE_CURBE_DW_BLOCK_IP); allocCurbeReg(lid0, GBE_CURBE_LOCAL_ID_X); allocCurbeReg(lid1, GBE_CURBE_LOCAL_ID_Y); allocCurbeReg(lid2, GBE_CURBE_LOCAL_ID_Z); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 5df5673d..becb1c99 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -653,6 +653,64 @@ namespace gbe SelectionDAG *dag0, SelectionDAG *dag1, GenRegister &src0, GenRegister &src1, ir::Type type, bool &inverse); + + /* Get current block IP register according to label width. */ + GenRegister getBlockIP() { + return ctx.isDWLabel() ? selReg(ir::ocl::dwblockip) : selReg(ir::ocl::blockip); + } + + /* Get proper label immediate gen register from label value. */ + GenRegister getLabelImmReg(uint32_t labelValue) { + return ctx.isDWLabel() ? GenRegister::immud(labelValue) : GenRegister::immuw(labelValue); + } + + /* Get proper label immediate gen register from label. */ + GenRegister getLabelImmReg(ir::LabelIndex label) { + return getLabelImmReg(label.value()); + } + + /* Set current label register to a label value. */ + void setBlockIP(GenRegister blockip, uint32_t labelValue) { + if (!ctx.isDWLabel()) + MOV(GenRegister::retype(blockip, GEN_TYPE_UW), GenRegister::immuw(labelValue)); + else + MOV(GenRegister::retype(blockip, GEN_TYPE_UD), GenRegister::immud(labelValue)); + } + + /* Generate comparison instruction to compare block ip address and specified label register.*/ + void cmpBlockIP(uint32_t cond, + GenRegister blockip, + GenRegister labelReg) { + if (!ctx.isDWLabel()) + CMP(cond, + GenRegister::retype(blockip, GEN_TYPE_UW), + labelReg, + GenRegister::retype(GenRegister::null(), + GEN_TYPE_UW)); + else + CMP(cond, + GenRegister::retype(blockip, GEN_TYPE_UD), + labelReg, + GenRegister::retype(GenRegister::null(), + GEN_TYPE_UD)); + } + + void cmpBlockIP(uint32_t cond, + GenRegister blockip, + uint32_t labelValue) { + if (!ctx.isDWLabel()) + CMP(cond, + GenRegister::retype(blockip, GEN_TYPE_UW), + GenRegister::immuw(labelValue), + GenRegister::retype(GenRegister::null(), + GEN_TYPE_UW)); + else + CMP(cond, + GenRegister::retype(blockip, GEN_TYPE_UD), + GenRegister::immud(labelValue), + GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); + } + /*! Use custom allocators */ GBE_CLASS(Opaque); friend class SelectionBlock; @@ -4225,10 +4283,10 @@ namespace gbe { using namespace ir; const LabelIndex label = insn.getLabelIndex(); - const GenRegister src0 = sel.selReg(ocl::blockip); - const GenRegister src1 = GenRegister::immuw(label); + const GenRegister src0 = sel.getBlockIP(); + const GenRegister src1 = sel.getLabelImmReg(label); const uint32_t simdWidth = sel.ctx.getSimdWidth(); - GBE_ASSERTM(label < GEN_MAX_LABEL, "We reached the maximum label number which is reserved for barrier handling"); + GBE_ASSERTM(label < sel.ctx.getMaxLabel(), "We reached the maximum label number which is reserved for barrier handling"); sel.LABEL(label); if(!insn.getParent()->needIf) @@ -4249,8 +4307,7 @@ namespace gbe sel.push(); sel.curr.noMask = 1; sel.curr.predicate = GEN_PREDICATE_NONE; - sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, - GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); + sel.cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1); sel.pop(); if (sel.block->hasBarrier) { @@ -4260,11 +4317,10 @@ namespace gbe // this block, as it will always excute with all lanes activated. sel.push(); sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL)); + sel.setBlockIP(src0, sel.ctx.getMaxLabel()); sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.noMask = 1; - sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL), - GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); + sel.cmpBlockIP(GEN_CONDITIONAL_EQ, src0, sel.ctx.getMaxLabel()); if (simdWidth == 8) sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; else if (simdWidth == 16) @@ -4279,7 +4335,7 @@ namespace gbe // FIXME, if the last BRA is unconditional jump, we don't need to update the label here. sel.push(); sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(label.value())); + sel.setBlockIP(src0, label.value()); sel.pop(); } else { @@ -4582,7 +4638,7 @@ namespace gbe ir::LabelIndex src) const { using namespace ir; - const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16); + const GenRegister ip = sel.getBlockIP(); // We will not emit any jump if we must go the next block anyway const BasicBlock *curr = insn.getParent(); @@ -4597,7 +4653,7 @@ namespace gbe sel.curr.physicalFlag = 0; sel.curr.flagIndex = pred.value(); sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.MOV(ip, GenRegister::immuw(dst.value())); + sel.setBlockIP(ip, dst.value()); sel.curr.predicate = GEN_PREDICATE_NONE; if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) sel.ENDIF(GenRegister::immd(0), nextLabel); @@ -4607,7 +4663,7 @@ namespace gbe // Update the PcIPs const LabelIndex jip = sel.ctx.getLabelIndex(&insn); if(insn.getParent()->needEndif) - sel.MOV(ip, GenRegister::immuw(dst.value())); + sel.setBlockIP(ip, dst.value()); if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) { if(insn.getParent()->needEndif && !insn.getParent()->needIf) @@ -4633,7 +4689,8 @@ namespace gbe ir::LabelIndex src) const { using namespace ir; - const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16); + //const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16); + const GenRegister ip = sel.getBlockIP(); const Function &fn = sel.ctx.getFunction(); const BasicBlock &bb = fn.getBlock(src); const LabelIndex jip = sel.ctx.getLabelIndex(&insn); @@ -4648,13 +4705,13 @@ namespace gbe // block. Next instruction will properly update the IPs of the lanes // that actually take the branch const LabelIndex next = bb.getNextBlock()->getLabelIndex(); - sel.MOV(ip, GenRegister::immuw(next.value())); + sel.setBlockIP(ip, next.value()); GBE_ASSERT(jip == dst); sel.push(); sel.curr.physicalFlag = 0; sel.curr.flagIndex = pred.value(); sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.MOV(ip, GenRegister::immuw(dst.value())); + sel.setBlockIP(ip, dst.value()); sel.block->endifOffset = -1; sel.curr.predicate = GEN_PREDICATE_NONE; if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) @@ -4671,7 +4728,7 @@ namespace gbe const LabelIndex next = bb.getNextBlock()->getLabelIndex(); // Update the PcIPs if(insn.getParent()->needEndif) - sel.MOV(ip, GenRegister::immuw(dst.value())); + sel.setBlockIP(ip, dst.value()); sel.block->endifOffset = -1; if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) { if(insn.getParent()->needEndif && !insn.getParent()->needIf) diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 686f3090..7c9bce5e 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -44,7 +44,6 @@ namespace gbe /*! Translate IR compare to Gen compare */ uint32_t getGenCompare(ir::Opcode opcode); - #define GEN_MAX_LABEL 0xFFFF /*! Selection opcodes properly encoded from 0 to n for fast jump tables * generations diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index dc5662fc..4065a17e 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -96,6 +96,7 @@ enum gbe_curbe_type { GBE_CURBE_KERNEL_ARGUMENT, GBE_CURBE_EXTRA_ARGUMENT, GBE_CURBE_BLOCK_IP, + GBE_CURBE_DW_BLOCK_IP, GBE_CURBE_THREAD_NUM, GBE_CURBE_ZERO, GBE_CURBE_ONE, diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp index 6dde6e2b..38355e2e 100644 --- a/backend/src/ir/function.cpp +++ b/backend/src/ir/function.cpp @@ -136,8 +136,8 @@ namespace ir { } LabelIndex Function::newLabel(void) { - GBE_ASSERTM(labels.size() < 0xffff, - "Too many labels are defined (65536 only are supported)"); + GBE_ASSERTM(labels.size() < 0xffffffffull, + "Too many labels are defined (4G only are supported)"); const LabelIndex index(labels.size()); labels.push_back(NULL); return index; diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index 4c272bd4..ec7ab94f 100644 --- a/backend/src/ir/profile.cpp +++ b/backend/src/ir/profile.cpp @@ -43,6 +43,7 @@ namespace ir { "zero", "one", "retVal", "slm_offset", "printf_buffer_pointer", "printf_index_buffer_pointer", + "dwblockip", "invalid" }; @@ -86,6 +87,7 @@ namespace ir { DECL_NEW_REG(FAMILY_DWORD, slmoffset, 1); DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1); DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1); + DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0); DECL_NEW_REG(FAMILY_DWORD, invalid, 1); } #undef DECL_NEW_REG diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index 7259d9f6..8f69320d 100644 --- a/backend/src/ir/profile.hpp +++ b/backend/src/ir/profile.hpp @@ -71,8 +71,9 @@ namespace ir { static const Register slmoffset = Register(27); // Group's SLM offset in total 64K SLM static const Register printfbptr = Register(28); // printf buffer address . static const Register printfiptr = Register(29); // printf index buffer address. - static const Register invalid = Register(30); // used for valid comparation. - static const uint32_t regNum = 31; // number of special registers + static const Register dwblockip = Register(30); // blockip + static const Register invalid = Register(31); // used for valid comparation. + static const uint32_t regNum = 32; // number of special registers extern const char *specialRegMean[]; // special register name. } /* namespace ocl */ diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 253c4f2e..4adbd2b2 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -49,23 +49,27 @@ cl_set_varying_payload(const cl_kernel ker, size_t i, j, k, curr = 0; int32_t id_offset[3], ip_offset; cl_int err = CL_SUCCESS; + int32_t dw_ip_offset = -1; id_offset[0] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_X, 0); id_offset[1] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Y, 0); id_offset[2] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Z, 0); ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_BLOCK_IP, 0); + if (ip_offset < 0) + dw_ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_DW_BLOCK_IP, 0); + assert(ip_offset < 0 || dw_ip_offset < 0); assert(id_offset[0] >= 0 && id_offset[1] >= 0 && id_offset[2] >= 0 && - ip_offset >= 0); + (ip_offset >= 0 || dw_ip_offset >= 0)); TRY_ALLOC(ids[0], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz)); TRY_ALLOC(ids[1], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz)); TRY_ALLOC(ids[2], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz)); TRY_ALLOC(block_ips, (uint16_t*) alloca(sizeof(uint16_t)*thread_n*simd_sz)); - /* 0xffff means that the lane is inactivated */ - memset(block_ips, 0xff, sizeof(uint16_t)*thread_n*simd_sz); + memset(block_ips, 0xff, sizeof(int16_t)*thread_n*simd_sz); + /* Compute the IDs and the block IPs */ for (k = 0; k < local_wk_sz[2]; ++k) @@ -84,11 +88,15 @@ cl_set_varying_payload(const cl_kernel ker, uint32_t *ids1 = (uint32_t *) (data + id_offset[1]); uint32_t *ids2 = (uint32_t *) (data + id_offset[2]); uint16_t *ips = (uint16_t *) (data + ip_offset); + uint32_t *dw_ips = (uint32_t *) (data + dw_ip_offset); for (j = 0; j < simd_sz; ++j, ++curr) { ids0[j] = ids[0][curr]; ids1[j] = ids[1][curr]; ids2[j] = ids[2][curr]; - ips[j] = block_ips[curr]; + if (ip_offset >= 0) + ips[j] = block_ips[curr]; + if (dw_ip_offset >= 0) + dw_ips[j] = block_ips[curr]; } } |