summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2015-03-31 16:53:11 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-04-13 16:11:28 +0800
commit5f267ed48ed996a18bfd8daff12502158d377d92 (patch)
treef950deefadc5b8f6f3a5f2f10be4c82aa98a70b5
parent321975900126f924df19e85e564d7b203c15c65d (diff)
GBE: Extend front label ip to 32 bit on demand.
If the front end label ip exceed 0xffff, then the backend will use real DW to represent each block's IP address. This is a dynamic behaviour according to the actual front end's label number. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
-rw-r--r--backend/src/backend/context.cpp2
-rw-r--r--backend/src/backend/context.hpp8
-rw-r--r--backend/src/backend/gen_context.cpp29
-rw-r--r--backend/src/backend/gen_insn_selection.cpp89
-rw-r--r--backend/src/backend/gen_insn_selection.hpp1
-rw-r--r--backend/src/backend/program.h1
-rw-r--r--backend/src/ir/function.cpp4
-rw-r--r--backend/src/ir/profile.cpp2
-rw-r--r--backend/src/ir/profile.hpp5
-rw-r--r--src/cl_command_queue_gen7.c16
10 files changed, 128 insertions, 29 deletions
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index 5e33ddd6..59ccc79d 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -354,6 +354,8 @@ namespace gbe
this->kernel = this->allocateKernel();
this->kernel->simdWidth = this->simdWidth;
this->buildArgList();
+ if (fn.labelNum() > 0xffff)
+ this->useDWLabel = true;
if (usedLabels.size() == 0)
this->buildUsedLabels();
if (JIPs.size() == 0)
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index 1b3744b2..faa7c8aa 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -100,6 +100,13 @@ namespace gbe
/*! Preallocated curbe register set including special registers. */
map<ir::Register, uint32_t> curbeRegs;
ir::Register getSurfaceBaseReg(unsigned char bti);
+ /* Indicate whether we should use DW label or W label in backend.*/
+ bool isDWLabel(void) const {
+ return useDWLabel;
+ }
+ uint32_t getMaxLabel(void) const {
+ return this->isDWLabel() ? 0xffffffff : 0xffff;
+ }
protected:
/*! Build the instruction stream. Return false if failed */
virtual bool emitCode(void) = 0;
@@ -140,6 +147,7 @@ namespace gbe
set<ir::LabelIndex> usedLabels; //!< Set of all used labels
JIPMap JIPs; //!< Where to jump all labels/branches
uint32_t simdWidth; //!< Number of lanes per HW threads
+ bool useDWLabel; //!< false means using u16 label, true means using u32 label.
map<unsigned char, ir::Register> btiRegMap;
GBE_CLASS(Context); //!< Use custom allocators
};
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index cdf581c4..094e6b49 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -134,18 +134,36 @@ namespace gbe
return true;
}
+ /* Get proper block ip register according to current label width. */
+ static GenRegister getBlockIP(GenContext &ctx) {
+ GenRegister blockip;
+ if (!ctx.isDWLabel())
+ blockip = ctx.ra->genReg(GenRegister::uw8grf(ir::ocl::blockip));
+ else
+ blockip = ctx.ra->genReg(GenRegister::ud8grf(ir::ocl::dwblockip));
+ return blockip;
+ }
+
+ /* Set current block ip register to a specified constant label value. */
+ static void setBlockIP(GenContext &ctx, GenRegister blockip, uint32_t label) {
+ if (!ctx.isDWLabel())
+ ctx.p->MOV(blockip, GenRegister::immuw(label));
+ else
+ ctx.p->MOV(blockip, GenRegister::immud(label));
+ }
+
void GenContext::clearFlagRegister(void) {
// when group size not aligned to simdWidth, flag register need clear to
// make prediction(any8/16h) work correctly
- const GenRegister blockip = ra->genReg(GenRegister::uw8grf(ir::ocl::blockip));
+ const GenRegister blockip = getBlockIP(*this);
const GenRegister zero = ra->genReg(GenRegister::uw1grf(ir::ocl::zero));
const GenRegister one = ra->genReg(GenRegister::uw1grf(ir::ocl::one));
p->push();
p->curr.noMask = 1;
p->curr.predicate = GEN_PREDICATE_NONE;
- p->MOV(blockip, GenRegister::immuw(GEN_MAX_LABEL));
+ setBlockIP(*this, blockip, getMaxLabel());
p->curr.noMask = 0;
- p->MOV(blockip, GenRegister::immuw(0));
+ setBlockIP(*this, blockip, 0);
p->curr.execWidth = 1;
// FIXME, need to get the final use set of zero/one, if there is no user,
// no need to generate the following two instructions.
@@ -1990,7 +2008,10 @@ namespace gbe
// We insert the block IP mask first
using namespace ir::ocl;
- allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP);
+ if (!isDWLabel())
+ allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP);
+ else
+ allocCurbeReg(dwblockip, GBE_CURBE_DW_BLOCK_IP);
allocCurbeReg(lid0, GBE_CURBE_LOCAL_ID_X);
allocCurbeReg(lid1, GBE_CURBE_LOCAL_ID_Y);
allocCurbeReg(lid2, GBE_CURBE_LOCAL_ID_Z);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 5df5673d..becb1c99 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -653,6 +653,64 @@ namespace gbe
SelectionDAG *dag0, SelectionDAG *dag1,
GenRegister &src0, GenRegister &src1,
ir::Type type, bool &inverse);
+
+ /* Get current block IP register according to label width. */
+ GenRegister getBlockIP() {
+ return ctx.isDWLabel() ? selReg(ir::ocl::dwblockip) : selReg(ir::ocl::blockip);
+ }
+
+ /* Get proper label immediate gen register from label value. */
+ GenRegister getLabelImmReg(uint32_t labelValue) {
+ return ctx.isDWLabel() ? GenRegister::immud(labelValue) : GenRegister::immuw(labelValue);
+ }
+
+ /* Get proper label immediate gen register from label. */
+ GenRegister getLabelImmReg(ir::LabelIndex label) {
+ return getLabelImmReg(label.value());
+ }
+
+ /* Set current label register to a label value. */
+ void setBlockIP(GenRegister blockip, uint32_t labelValue) {
+ if (!ctx.isDWLabel())
+ MOV(GenRegister::retype(blockip, GEN_TYPE_UW), GenRegister::immuw(labelValue));
+ else
+ MOV(GenRegister::retype(blockip, GEN_TYPE_UD), GenRegister::immud(labelValue));
+ }
+
+ /* Generate comparison instruction to compare block ip address and specified label register.*/
+ void cmpBlockIP(uint32_t cond,
+ GenRegister blockip,
+ GenRegister labelReg) {
+ if (!ctx.isDWLabel())
+ CMP(cond,
+ GenRegister::retype(blockip, GEN_TYPE_UW),
+ labelReg,
+ GenRegister::retype(GenRegister::null(),
+ GEN_TYPE_UW));
+ else
+ CMP(cond,
+ GenRegister::retype(blockip, GEN_TYPE_UD),
+ labelReg,
+ GenRegister::retype(GenRegister::null(),
+ GEN_TYPE_UD));
+ }
+
+ void cmpBlockIP(uint32_t cond,
+ GenRegister blockip,
+ uint32_t labelValue) {
+ if (!ctx.isDWLabel())
+ CMP(cond,
+ GenRegister::retype(blockip, GEN_TYPE_UW),
+ GenRegister::immuw(labelValue),
+ GenRegister::retype(GenRegister::null(),
+ GEN_TYPE_UW));
+ else
+ CMP(cond,
+ GenRegister::retype(blockip, GEN_TYPE_UD),
+ GenRegister::immud(labelValue),
+ GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+ }
+
/*! Use custom allocators */
GBE_CLASS(Opaque);
friend class SelectionBlock;
@@ -4225,10 +4283,10 @@ namespace gbe
{
using namespace ir;
const LabelIndex label = insn.getLabelIndex();
- const GenRegister src0 = sel.selReg(ocl::blockip);
- const GenRegister src1 = GenRegister::immuw(label);
+ const GenRegister src0 = sel.getBlockIP();
+ const GenRegister src1 = sel.getLabelImmReg(label);
const uint32_t simdWidth = sel.ctx.getSimdWidth();
- GBE_ASSERTM(label < GEN_MAX_LABEL, "We reached the maximum label number which is reserved for barrier handling");
+ GBE_ASSERTM(label < sel.ctx.getMaxLabel(), "We reached the maximum label number which is reserved for barrier handling");
sel.LABEL(label);
if(!insn.getParent()->needIf)
@@ -4249,8 +4307,7 @@ namespace gbe
sel.push();
sel.curr.noMask = 1;
sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1,
- GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+ sel.cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1);
sel.pop();
if (sel.block->hasBarrier) {
@@ -4260,11 +4317,10 @@ namespace gbe
// this block, as it will always excute with all lanes activated.
sel.push();
sel.curr.predicate = GEN_PREDICATE_NORMAL;
- sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL));
+ sel.setBlockIP(src0, sel.ctx.getMaxLabel());
sel.curr.predicate = GEN_PREDICATE_NONE;
sel.curr.noMask = 1;
- sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL),
- GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+ sel.cmpBlockIP(GEN_CONDITIONAL_EQ, src0, sel.ctx.getMaxLabel());
if (simdWidth == 8)
sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
else if (simdWidth == 16)
@@ -4279,7 +4335,7 @@ namespace gbe
// FIXME, if the last BRA is unconditional jump, we don't need to update the label here.
sel.push();
sel.curr.predicate = GEN_PREDICATE_NORMAL;
- sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(label.value()));
+ sel.setBlockIP(src0, label.value());
sel.pop();
}
else {
@@ -4582,7 +4638,7 @@ namespace gbe
ir::LabelIndex src) const
{
using namespace ir;
- const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16);
+ const GenRegister ip = sel.getBlockIP();
// We will not emit any jump if we must go the next block anyway
const BasicBlock *curr = insn.getParent();
@@ -4597,7 +4653,7 @@ namespace gbe
sel.curr.physicalFlag = 0;
sel.curr.flagIndex = pred.value();
sel.curr.predicate = GEN_PREDICATE_NORMAL;
- sel.MOV(ip, GenRegister::immuw(dst.value()));
+ sel.setBlockIP(ip, dst.value());
sel.curr.predicate = GEN_PREDICATE_NONE;
if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
sel.ENDIF(GenRegister::immd(0), nextLabel);
@@ -4607,7 +4663,7 @@ namespace gbe
// Update the PcIPs
const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
if(insn.getParent()->needEndif)
- sel.MOV(ip, GenRegister::immuw(dst.value()));
+ sel.setBlockIP(ip, dst.value());
if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) {
if(insn.getParent()->needEndif && !insn.getParent()->needIf)
@@ -4633,7 +4689,8 @@ namespace gbe
ir::LabelIndex src) const
{
using namespace ir;
- const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16);
+ //const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16);
+ const GenRegister ip = sel.getBlockIP();
const Function &fn = sel.ctx.getFunction();
const BasicBlock &bb = fn.getBlock(src);
const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
@@ -4648,13 +4705,13 @@ namespace gbe
// block. Next instruction will properly update the IPs of the lanes
// that actually take the branch
const LabelIndex next = bb.getNextBlock()->getLabelIndex();
- sel.MOV(ip, GenRegister::immuw(next.value()));
+ sel.setBlockIP(ip, next.value());
GBE_ASSERT(jip == dst);
sel.push();
sel.curr.physicalFlag = 0;
sel.curr.flagIndex = pred.value();
sel.curr.predicate = GEN_PREDICATE_NORMAL;
- sel.MOV(ip, GenRegister::immuw(dst.value()));
+ sel.setBlockIP(ip, dst.value());
sel.block->endifOffset = -1;
sel.curr.predicate = GEN_PREDICATE_NONE;
if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
@@ -4671,7 +4728,7 @@ namespace gbe
const LabelIndex next = bb.getNextBlock()->getLabelIndex();
// Update the PcIPs
if(insn.getParent()->needEndif)
- sel.MOV(ip, GenRegister::immuw(dst.value()));
+ sel.setBlockIP(ip, dst.value());
sel.block->endifOffset = -1;
if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) {
if(insn.getParent()->needEndif && !insn.getParent()->needIf)
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 686f3090..7c9bce5e 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -44,7 +44,6 @@ namespace gbe
/*! Translate IR compare to Gen compare */
uint32_t getGenCompare(ir::Opcode opcode);
- #define GEN_MAX_LABEL 0xFFFF
/*! Selection opcodes properly encoded from 0 to n for fast jump tables
* generations
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index dc5662fc..4065a17e 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -96,6 +96,7 @@ enum gbe_curbe_type {
GBE_CURBE_KERNEL_ARGUMENT,
GBE_CURBE_EXTRA_ARGUMENT,
GBE_CURBE_BLOCK_IP,
+ GBE_CURBE_DW_BLOCK_IP,
GBE_CURBE_THREAD_NUM,
GBE_CURBE_ZERO,
GBE_CURBE_ONE,
diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
index 6dde6e2b..38355e2e 100644
--- a/backend/src/ir/function.cpp
+++ b/backend/src/ir/function.cpp
@@ -136,8 +136,8 @@ namespace ir {
}
LabelIndex Function::newLabel(void) {
- GBE_ASSERTM(labels.size() < 0xffff,
- "Too many labels are defined (65536 only are supported)");
+ GBE_ASSERTM(labels.size() < 0xffffffffull,
+ "Too many labels are defined (4G only are supported)");
const LabelIndex index(labels.size());
labels.push_back(NULL);
return index;
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 4c272bd4..ec7ab94f 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -43,6 +43,7 @@ namespace ir {
"zero", "one",
"retVal", "slm_offset",
"printf_buffer_pointer", "printf_index_buffer_pointer",
+ "dwblockip",
"invalid"
};
@@ -86,6 +87,7 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, slmoffset, 1);
DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
+ DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
}
#undef DECL_NEW_REG
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index 7259d9f6..8f69320d 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -71,8 +71,9 @@ namespace ir {
static const Register slmoffset = Register(27); // Group's SLM offset in total 64K SLM
static const Register printfbptr = Register(28); // printf buffer address .
static const Register printfiptr = Register(29); // printf index buffer address.
- static const Register invalid = Register(30); // used for valid comparation.
- static const uint32_t regNum = 31; // number of special registers
+ static const Register dwblockip = Register(30); // blockip
+ static const Register invalid = Register(31); // used for valid comparation.
+ static const uint32_t regNum = 32; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 253c4f2e..4adbd2b2 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -49,23 +49,27 @@ cl_set_varying_payload(const cl_kernel ker,
size_t i, j, k, curr = 0;
int32_t id_offset[3], ip_offset;
cl_int err = CL_SUCCESS;
+ int32_t dw_ip_offset = -1;
id_offset[0] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_X, 0);
id_offset[1] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Y, 0);
id_offset[2] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Z, 0);
ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_BLOCK_IP, 0);
+ if (ip_offset < 0)
+ dw_ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_DW_BLOCK_IP, 0);
+ assert(ip_offset < 0 || dw_ip_offset < 0);
assert(id_offset[0] >= 0 &&
id_offset[1] >= 0 &&
id_offset[2] >= 0 &&
- ip_offset >= 0);
+ (ip_offset >= 0 || dw_ip_offset >= 0));
TRY_ALLOC(ids[0], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
TRY_ALLOC(ids[1], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
TRY_ALLOC(ids[2], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
TRY_ALLOC(block_ips, (uint16_t*) alloca(sizeof(uint16_t)*thread_n*simd_sz));
-
/* 0xffff means that the lane is inactivated */
- memset(block_ips, 0xff, sizeof(uint16_t)*thread_n*simd_sz);
+ memset(block_ips, 0xff, sizeof(int16_t)*thread_n*simd_sz);
+
/* Compute the IDs and the block IPs */
for (k = 0; k < local_wk_sz[2]; ++k)
@@ -84,11 +88,15 @@ cl_set_varying_payload(const cl_kernel ker,
uint32_t *ids1 = (uint32_t *) (data + id_offset[1]);
uint32_t *ids2 = (uint32_t *) (data + id_offset[2]);
uint16_t *ips = (uint16_t *) (data + ip_offset);
+ uint32_t *dw_ips = (uint32_t *) (data + dw_ip_offset);
for (j = 0; j < simd_sz; ++j, ++curr) {
ids0[j] = ids[0][curr];
ids1[j] = ids[1][curr];
ids2[j] = ids[2][curr];
- ips[j] = block_ips[curr];
+ if (ip_offset >= 0)
+ ips[j] = block_ips[curr];
+ if (dw_ip_offset >= 0)
+ dw_ips[j] = block_ips[curr];
}
}