diff options
author | Zhigang Gong <zhigang.gong@intel.com> | 2015-03-31 16:53:11 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-04-13 16:11:28 +0800 |
commit | 5f267ed48ed996a18bfd8daff12502158d377d92 (patch) | |
tree | f950deefadc5b8f6f3a5f2f10be4c82aa98a70b5 /backend | |
parent | 321975900126f924df19e85e564d7b203c15c65d (diff) |
GBE: Extend front label ip to 32 bit on demand.
If the front end label ip exceed 0xffff, then the backend will
use real DW to represent each block's IP address. This is
a dynamic behaviour according to the actual front end's label
number.
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Diffstat (limited to 'backend')
-rw-r--r-- | backend/src/backend/context.cpp | 2 | ||||
-rw-r--r-- | backend/src/backend/context.hpp | 8 | ||||
-rw-r--r-- | backend/src/backend/gen_context.cpp | 29 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 89 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.hpp | 1 | ||||
-rw-r--r-- | backend/src/backend/program.h | 1 | ||||
-rw-r--r-- | backend/src/ir/function.cpp | 4 | ||||
-rw-r--r-- | backend/src/ir/profile.cpp | 2 | ||||
-rw-r--r-- | backend/src/ir/profile.hpp | 5 |
9 files changed, 116 insertions, 25 deletions
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index 5e33ddd6..59ccc79d 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -354,6 +354,8 @@ namespace gbe this->kernel = this->allocateKernel(); this->kernel->simdWidth = this->simdWidth; this->buildArgList(); + if (fn.labelNum() > 0xffff) + this->useDWLabel = true; if (usedLabels.size() == 0) this->buildUsedLabels(); if (JIPs.size() == 0) diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp index 1b3744b2..faa7c8aa 100644 --- a/backend/src/backend/context.hpp +++ b/backend/src/backend/context.hpp @@ -100,6 +100,13 @@ namespace gbe /*! Preallocated curbe register set including special registers. */ map<ir::Register, uint32_t> curbeRegs; ir::Register getSurfaceBaseReg(unsigned char bti); + /* Indicate whether we should use DW label or W label in backend.*/ + bool isDWLabel(void) const { + return useDWLabel; + } + uint32_t getMaxLabel(void) const { + return this->isDWLabel() ? 0xffffffff : 0xffff; + } protected: /*! Build the instruction stream. Return false if failed */ virtual bool emitCode(void) = 0; @@ -140,6 +147,7 @@ namespace gbe set<ir::LabelIndex> usedLabels; //!< Set of all used labels JIPMap JIPs; //!< Where to jump all labels/branches uint32_t simdWidth; //!< Number of lanes per HW threads + bool useDWLabel; //!< false means using u16 label, true means using u32 label. map<unsigned char, ir::Register> btiRegMap; GBE_CLASS(Context); //!< Use custom allocators }; diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index cdf581c4..094e6b49 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -134,18 +134,36 @@ namespace gbe return true; } + /* Get proper block ip register according to current label width. */ + static GenRegister getBlockIP(GenContext &ctx) { + GenRegister blockip; + if (!ctx.isDWLabel()) + blockip = ctx.ra->genReg(GenRegister::uw8grf(ir::ocl::blockip)); + else + blockip = ctx.ra->genReg(GenRegister::ud8grf(ir::ocl::dwblockip)); + return blockip; + } + + /* Set current block ip register to a specified constant label value. */ + static void setBlockIP(GenContext &ctx, GenRegister blockip, uint32_t label) { + if (!ctx.isDWLabel()) + ctx.p->MOV(blockip, GenRegister::immuw(label)); + else + ctx.p->MOV(blockip, GenRegister::immud(label)); + } + void GenContext::clearFlagRegister(void) { // when group size not aligned to simdWidth, flag register need clear to // make prediction(any8/16h) work correctly - const GenRegister blockip = ra->genReg(GenRegister::uw8grf(ir::ocl::blockip)); + const GenRegister blockip = getBlockIP(*this); const GenRegister zero = ra->genReg(GenRegister::uw1grf(ir::ocl::zero)); const GenRegister one = ra->genReg(GenRegister::uw1grf(ir::ocl::one)); p->push(); p->curr.noMask = 1; p->curr.predicate = GEN_PREDICATE_NONE; - p->MOV(blockip, GenRegister::immuw(GEN_MAX_LABEL)); + setBlockIP(*this, blockip, getMaxLabel()); p->curr.noMask = 0; - p->MOV(blockip, GenRegister::immuw(0)); + setBlockIP(*this, blockip, 0); p->curr.execWidth = 1; // FIXME, need to get the final use set of zero/one, if there is no user, // no need to generate the following two instructions. @@ -1990,7 +2008,10 @@ namespace gbe // We insert the block IP mask first using namespace ir::ocl; - allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP); + if (!isDWLabel()) + allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP); + else + allocCurbeReg(dwblockip, GBE_CURBE_DW_BLOCK_IP); allocCurbeReg(lid0, GBE_CURBE_LOCAL_ID_X); allocCurbeReg(lid1, GBE_CURBE_LOCAL_ID_Y); allocCurbeReg(lid2, GBE_CURBE_LOCAL_ID_Z); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 5df5673d..becb1c99 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -653,6 +653,64 @@ namespace gbe SelectionDAG *dag0, SelectionDAG *dag1, GenRegister &src0, GenRegister &src1, ir::Type type, bool &inverse); + + /* Get current block IP register according to label width. */ + GenRegister getBlockIP() { + return ctx.isDWLabel() ? selReg(ir::ocl::dwblockip) : selReg(ir::ocl::blockip); + } + + /* Get proper label immediate gen register from label value. */ + GenRegister getLabelImmReg(uint32_t labelValue) { + return ctx.isDWLabel() ? GenRegister::immud(labelValue) : GenRegister::immuw(labelValue); + } + + /* Get proper label immediate gen register from label. */ + GenRegister getLabelImmReg(ir::LabelIndex label) { + return getLabelImmReg(label.value()); + } + + /* Set current label register to a label value. */ + void setBlockIP(GenRegister blockip, uint32_t labelValue) { + if (!ctx.isDWLabel()) + MOV(GenRegister::retype(blockip, GEN_TYPE_UW), GenRegister::immuw(labelValue)); + else + MOV(GenRegister::retype(blockip, GEN_TYPE_UD), GenRegister::immud(labelValue)); + } + + /* Generate comparison instruction to compare block ip address and specified label register.*/ + void cmpBlockIP(uint32_t cond, + GenRegister blockip, + GenRegister labelReg) { + if (!ctx.isDWLabel()) + CMP(cond, + GenRegister::retype(blockip, GEN_TYPE_UW), + labelReg, + GenRegister::retype(GenRegister::null(), + GEN_TYPE_UW)); + else + CMP(cond, + GenRegister::retype(blockip, GEN_TYPE_UD), + labelReg, + GenRegister::retype(GenRegister::null(), + GEN_TYPE_UD)); + } + + void cmpBlockIP(uint32_t cond, + GenRegister blockip, + uint32_t labelValue) { + if (!ctx.isDWLabel()) + CMP(cond, + GenRegister::retype(blockip, GEN_TYPE_UW), + GenRegister::immuw(labelValue), + GenRegister::retype(GenRegister::null(), + GEN_TYPE_UW)); + else + CMP(cond, + GenRegister::retype(blockip, GEN_TYPE_UD), + GenRegister::immud(labelValue), + GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); + } + /*! Use custom allocators */ GBE_CLASS(Opaque); friend class SelectionBlock; @@ -4225,10 +4283,10 @@ namespace gbe { using namespace ir; const LabelIndex label = insn.getLabelIndex(); - const GenRegister src0 = sel.selReg(ocl::blockip); - const GenRegister src1 = GenRegister::immuw(label); + const GenRegister src0 = sel.getBlockIP(); + const GenRegister src1 = sel.getLabelImmReg(label); const uint32_t simdWidth = sel.ctx.getSimdWidth(); - GBE_ASSERTM(label < GEN_MAX_LABEL, "We reached the maximum label number which is reserved for barrier handling"); + GBE_ASSERTM(label < sel.ctx.getMaxLabel(), "We reached the maximum label number which is reserved for barrier handling"); sel.LABEL(label); if(!insn.getParent()->needIf) @@ -4249,8 +4307,7 @@ namespace gbe sel.push(); sel.curr.noMask = 1; sel.curr.predicate = GEN_PREDICATE_NONE; - sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, - GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); + sel.cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1); sel.pop(); if (sel.block->hasBarrier) { @@ -4260,11 +4317,10 @@ namespace gbe // this block, as it will always excute with all lanes activated. sel.push(); sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL)); + sel.setBlockIP(src0, sel.ctx.getMaxLabel()); sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.noMask = 1; - sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL), - GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); + sel.cmpBlockIP(GEN_CONDITIONAL_EQ, src0, sel.ctx.getMaxLabel()); if (simdWidth == 8) sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; else if (simdWidth == 16) @@ -4279,7 +4335,7 @@ namespace gbe // FIXME, if the last BRA is unconditional jump, we don't need to update the label here. sel.push(); sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(label.value())); + sel.setBlockIP(src0, label.value()); sel.pop(); } else { @@ -4582,7 +4638,7 @@ namespace gbe ir::LabelIndex src) const { using namespace ir; - const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16); + const GenRegister ip = sel.getBlockIP(); // We will not emit any jump if we must go the next block anyway const BasicBlock *curr = insn.getParent(); @@ -4597,7 +4653,7 @@ namespace gbe sel.curr.physicalFlag = 0; sel.curr.flagIndex = pred.value(); sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.MOV(ip, GenRegister::immuw(dst.value())); + sel.setBlockIP(ip, dst.value()); sel.curr.predicate = GEN_PREDICATE_NONE; if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) sel.ENDIF(GenRegister::immd(0), nextLabel); @@ -4607,7 +4663,7 @@ namespace gbe // Update the PcIPs const LabelIndex jip = sel.ctx.getLabelIndex(&insn); if(insn.getParent()->needEndif) - sel.MOV(ip, GenRegister::immuw(dst.value())); + sel.setBlockIP(ip, dst.value()); if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) { if(insn.getParent()->needEndif && !insn.getParent()->needIf) @@ -4633,7 +4689,8 @@ namespace gbe ir::LabelIndex src) const { using namespace ir; - const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16); + //const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16); + const GenRegister ip = sel.getBlockIP(); const Function &fn = sel.ctx.getFunction(); const BasicBlock &bb = fn.getBlock(src); const LabelIndex jip = sel.ctx.getLabelIndex(&insn); @@ -4648,13 +4705,13 @@ namespace gbe // block. Next instruction will properly update the IPs of the lanes // that actually take the branch const LabelIndex next = bb.getNextBlock()->getLabelIndex(); - sel.MOV(ip, GenRegister::immuw(next.value())); + sel.setBlockIP(ip, next.value()); GBE_ASSERT(jip == dst); sel.push(); sel.curr.physicalFlag = 0; sel.curr.flagIndex = pred.value(); sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.MOV(ip, GenRegister::immuw(dst.value())); + sel.setBlockIP(ip, dst.value()); sel.block->endifOffset = -1; sel.curr.predicate = GEN_PREDICATE_NONE; if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) @@ -4671,7 +4728,7 @@ namespace gbe const LabelIndex next = bb.getNextBlock()->getLabelIndex(); // Update the PcIPs if(insn.getParent()->needEndif) - sel.MOV(ip, GenRegister::immuw(dst.value())); + sel.setBlockIP(ip, dst.value()); sel.block->endifOffset = -1; if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) { if(insn.getParent()->needEndif && !insn.getParent()->needIf) diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 686f3090..7c9bce5e 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -44,7 +44,6 @@ namespace gbe /*! Translate IR compare to Gen compare */ uint32_t getGenCompare(ir::Opcode opcode); - #define GEN_MAX_LABEL 0xFFFF /*! Selection opcodes properly encoded from 0 to n for fast jump tables * generations diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index dc5662fc..4065a17e 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -96,6 +96,7 @@ enum gbe_curbe_type { GBE_CURBE_KERNEL_ARGUMENT, GBE_CURBE_EXTRA_ARGUMENT, GBE_CURBE_BLOCK_IP, + GBE_CURBE_DW_BLOCK_IP, GBE_CURBE_THREAD_NUM, GBE_CURBE_ZERO, GBE_CURBE_ONE, diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp index 6dde6e2b..38355e2e 100644 --- a/backend/src/ir/function.cpp +++ b/backend/src/ir/function.cpp @@ -136,8 +136,8 @@ namespace ir { } LabelIndex Function::newLabel(void) { - GBE_ASSERTM(labels.size() < 0xffff, - "Too many labels are defined (65536 only are supported)"); + GBE_ASSERTM(labels.size() < 0xffffffffull, + "Too many labels are defined (4G only are supported)"); const LabelIndex index(labels.size()); labels.push_back(NULL); return index; diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index 4c272bd4..ec7ab94f 100644 --- a/backend/src/ir/profile.cpp +++ b/backend/src/ir/profile.cpp @@ -43,6 +43,7 @@ namespace ir { "zero", "one", "retVal", "slm_offset", "printf_buffer_pointer", "printf_index_buffer_pointer", + "dwblockip", "invalid" }; @@ -86,6 +87,7 @@ namespace ir { DECL_NEW_REG(FAMILY_DWORD, slmoffset, 1); DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1); DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1); + DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0); DECL_NEW_REG(FAMILY_DWORD, invalid, 1); } #undef DECL_NEW_REG diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index 7259d9f6..8f69320d 100644 --- a/backend/src/ir/profile.hpp +++ b/backend/src/ir/profile.hpp @@ -71,8 +71,9 @@ namespace ir { static const Register slmoffset = Register(27); // Group's SLM offset in total 64K SLM static const Register printfbptr = Register(28); // printf buffer address . static const Register printfiptr = Register(29); // printf index buffer address. - static const Register invalid = Register(30); // used for valid comparation. - static const uint32_t regNum = 31; // number of special registers + static const Register dwblockip = Register(30); // blockip + static const Register invalid = Register(31); // used for valid comparation. + static const uint32_t regNum = 32; // number of special registers extern const char *specialRegMean[]; // special register name. } /* namespace ocl */ |