diff options
-rw-r--r-- | backend/src/backend/gen_insn_scheduling.cpp | 156 | ||||
-rw-r--r-- | backend/src/backend/gen_register.hpp | 2 |
2 files changed, 104 insertions, 54 deletions
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp index 405e401f..478545b9 100644 --- a/backend/src/backend/gen_insn_scheduling.cpp +++ b/backend/src/backend/gen_insn_scheduling.cpp @@ -151,23 +151,49 @@ namespace gbe /*! Helper structure to handle dependencies while scheduling. Takes into * account virtual and physical registers and memory sub-systems */ + + struct DepIndex { + DepIndex(uint32_t addr, uint32_t size) : addr(addr), size(size) {} + uint32_t addr; + uint32_t size; + }; + struct DependencyTracker : public NonCopyable { DependencyTracker(const Selection &selection, SelectionScheduler &scheduler); /*! Reset it before scheduling a new block */ void clear(bool fullClear = false); /*! Get an index in the node array for the given register */ - uint32_t getIndex(GenRegister reg) const; + DepIndex getIndex(GenRegister reg) const; /*! Get an index in the node array for the given memory system */ - uint32_t getIndex(uint32_t bti) const; + DepIndex getIndex(uint32_t bti) const; /*! Add a new dependency "node0 depends on node1" */ void addDependency(ScheduleDAGNode *node0, ScheduleDAGNode *node1, DepMode m); /*! Add a new dependency "node0 depends on node located at index" */ - void addDependency(ScheduleDAGNode *node0, uint32_t index, DepMode m); + void addDependency(ScheduleDAGNode *node0, DepIndex index, DepMode m); /*! Add a new dependency "node located at index depends on node0" */ - void addDependency(uint32_t index, ScheduleDAGNode *node0, DepMode m); + void addDependency(DepIndex index, ScheduleDAGNode *node0, DepMode m); /*! Check whether an instruction is redundant. */ bool isRedundant(ScheduleDAGNode *node0); + /*! Check whether a dependent index contains a single node.*/ + bool isSingleNodeIndex(const DepIndex index) const { + bool ret = true; + ScheduleDAGNode *first = nodes[index.addr]; + for(uint32_t offset = 1; offset < index.size; offset++) { + if (nodes[offset + index.addr] != first) { + ret = false; + break; + } + } + return ret; + } + + /*! Set node to all the slots included in the index. */ + void setDepIndexNode(DepIndex index, ScheduleDAGNode *node) { + for(uint32_t offset = 0; offset < index.size; offset++) + nodes[index.addr + offset] = node; + } + /*! No dependency for null registers and immediate */ INLINE bool ignoreDependency(GenRegister reg) const { if (reg.file == GEN_IMMEDIATE_VALUE) @@ -247,7 +273,7 @@ namespace gbe } else { const uint32_t simdWidth = scheduler.ctx.getSimdWidth(); GBE_ASSERT(simdWidth == 8 || simdWidth == 16); - this->grfNum = simdWidth == 8 ? 128 : 64; + this->grfNum = 4096/2 + 32; //simdWidth == 8 ? 128 : 64; nodes.resize(grfNum + MAX_ARF_REGISTER + MAX_MEM_SYSTEM); } insnNodes.resize(selection.getLargestBlockSize()); @@ -256,19 +282,21 @@ namespace gbe void DependencyTracker::clear(bool fullClear) { for (auto &x : nodes) x = NULL; if (fullClear) deps.clear(); } void DependencyTracker::addDependency(ScheduleDAGNode *node0, GenRegister reg, DepMode m) { if (this->ignoreDependency(reg) == false) { - const uint32_t index = this->getIndex(reg); + const DepIndex index = this->getIndex(reg); + //printf("reg %d addr %d size %d \n", reg.reg(), index.addr, index.size); + //printf("nodes.size %d \n", nodes.size()); this->addDependency(node0, index, m); - if (scheduler.policy == POST_ALLOC && (reg.isdf() || reg.isint64())) - this->addDependency(node0, index + 1, m); + //if (scheduler.policy == POST_ALLOC && (reg.isdf() || reg.isint64())) + // this->addDependency(node0, index + 1, m); } } void DependencyTracker::addDependency(GenRegister reg, ScheduleDAGNode *node0, DepMode m) { if (this->ignoreDependency(reg) == false) { - const uint32_t index = this->getIndex(reg); + const DepIndex index = this->getIndex(reg); this->addDependency(index, node0, m); - if (scheduler.policy == POST_ALLOC && (reg.isdf() || reg.isint64())) - this->addDependency(index + 1, node0, m); + //if (scheduler.policy == POST_ALLOC && (reg.isdf() || reg.isint64())) + // this->addDependency(index + 1, node0, m); } } @@ -290,12 +318,16 @@ namespace gbe } } - void DependencyTracker::addDependency(ScheduleDAGNode *node, uint32_t index, DepMode m) { - this->addDependency(node, this->nodes[index], m); + void DependencyTracker::addDependency(ScheduleDAGNode *node, DepIndex index, DepMode m) { + for(uint32_t offset = 0; offset < index.size; offset++) { + this->addDependency(node, this->nodes[index.addr + offset], m); + } } - void DependencyTracker::addDependency(uint32_t index, ScheduleDAGNode *node, DepMode m) { - this->addDependency(this->nodes[index], node, m); + void DependencyTracker::addDependency(DepIndex index, ScheduleDAGNode *node, DepMode m) { + for(uint32_t offset = 0; offset < index.size; offset++) { + this->addDependency(this->nodes[index.addr + offset], node, m); + } } void DependencyTracker::makeBarrier(int32_t barrierID, int32_t insnNum) { @@ -319,7 +351,7 @@ namespace gbe return GenRegister::uw1grf(ir::Register(insn.state.flagIndex)); } - uint32_t DependencyTracker::getIndex(GenRegister reg) const { + DepIndex DependencyTracker::getIndex(GenRegister reg) const { // Non GRF physical register if (reg.physical) { //GBE_ASSERT (reg.file == GEN_ARCHITECTURE_REGISTER_FILE); @@ -329,35 +361,41 @@ namespace gbe if (file == GEN_ARF_FLAG) { const uint32_t subnr = reg.subnr / sizeof(uint16_t); GBE_ASSERT(nr < MAX_FLAG_REGISTER && (subnr == 0 || subnr == 1)); - return grfNum + 2*nr + subnr; + return DepIndex(grfNum + 2*nr + subnr, 1); } else if (file == GEN_ARF_ACCUMULATOR) { GBE_ASSERT(nr < MAX_ACC_REGISTER); - return grfNum + MAX_FLAG_REGISTER + nr; + return DepIndex(grfNum + MAX_FLAG_REGISTER + nr, 1); } else if (file == GEN_ARF_TM) { - return grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER; + return DepIndex(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER, 1); } else { NOT_SUPPORTED; - return 0; + return DepIndex(0, 0); } } else { - const uint32_t simdWidth = scheduler.ctx.getSimdWidth(); - return simdWidth == 8 ? reg.nr : reg.nr / 2; + uint32_t addr = (reg.nr * 32 + reg.subnr) / 2; + uint32_t size = scheduler.ctx.ra->getRegSize(reg.reg()) / 2; + return DepIndex(addr, size); } } // We directly manipulate physical GRFs here else if (scheduler.policy == POST_ALLOC) { const GenRegister physical = scheduler.ctx.ra->genReg(reg); - const uint32_t simdWidth = scheduler.ctx.getSimdWidth(); - return simdWidth == 8 ? physical.nr : physical.nr / 2; + uint32_t addr = (physical.nr * 32 + physical.subnr) / 2; + // invalid virtual reg means a generic register, and should be the reserved register for spill. + if (reg.reg() == ir::ocl::invalid) + return DepIndex(addr, scheduler.ctx.getSimdWidth() * 2); + uint32_t size = scheduler.ctx.ra->getRegSize(reg.reg()) / 2; + return DepIndex(addr, size); } // We use virtual registers since allocation is not done yet else - return reg.value.reg; + return DepIndex(reg.value.reg, 1); } - uint32_t DependencyTracker::getIndex(uint32_t bti) const { + DepIndex DependencyTracker::getIndex(uint32_t bti) const { const uint32_t memDelta = grfNum + MAX_ARF_REGISTER; - return bti == 0xfe ? memDelta + LOCAL_MEMORY : (bti == 0xff ? memDelta + SCRATCH_MEMORY : memDelta + GLOBAL_MEMORY); + uint32_t addr = bti == 0xfe ? memDelta + LOCAL_MEMORY : (bti == 0xff ? memDelta + SCRATCH_MEMORY : memDelta + GLOBAL_MEMORY); + return DepIndex(addr, 1); } void DependencyTracker::updateWrites(ScheduleDAGNode *node) { @@ -367,43 +405,51 @@ namespace gbe for (uint32_t dstID = 0; dstID < insn.dstNum; ++dstID) { const GenRegister dst = insn.dst(dstID); if (this->ignoreDependency(dst) == false) { - const uint32_t index = this->getIndex(dst); - this->nodes[index] = node; - if (scheduler.policy == POST_ALLOC && (dst.isdf() || dst.isint64())) - this->nodes[index + 1] = node; + const DepIndex index = this->getIndex(dst); + setDepIndexNode(index, node); + //for(uint32_t offset = 0; offset < index.size; offset++) + // this->nodes[index.addr + offset] = node; + //if (scheduler.policy == POST_ALLOC && (dst.isdf() || dst.isint64())) + // this->nodes[index + 1] = node; } } // Track writes in predicates if (insn.opcode == SEL_OP_CMP || insn.opcode == SEL_OP_I64CMP || insn.state.modFlag) { - const uint32_t index = this->getIndex(getFlag(insn)); - this->nodes[index] = node; + const DepIndex index = this->getIndex(getFlag(insn)); + //this->nodes[index] = node; + setDepIndexNode(index, node); } // Track writes in accumulators if (insn.state.accWrEnable) { - const uint32_t index = this->getIndex(GenRegister::acc()); - this->nodes[index] = node; + const DepIndex index = this->getIndex(GenRegister::acc()); + setDepIndexNode(index, node); + //this->nodes[index] = node; } // Track writes in memory if (insn.isWrite()) { - const uint32_t index = this->getIndex(insn.getbti()); - this->nodes[index] = node; + const DepIndex index = this->getIndex(insn.getbti()); + setDepIndexNode(index, node); + //this->nodes[index] = node; } // Track writes in scratch memory if(insn.opcode == SEL_OP_SPILL_REG) { - const uint32_t index = this->getIndex(0xff); - this->nodes[index] = node; + const DepIndex index = this->getIndex(0xff); + setDepIndexNode(index, node); + //this->nodes[index] = node; } // Consider barriers and wait write to memory if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_FENCE || insn.opcode == SEL_OP_WAIT) { - const uint32_t local = this->getIndex(0xfe); - const uint32_t global = this->getIndex(0x00); - this->nodes[local] = this->nodes[global] = node; + const DepIndex local = this->getIndex(0xfe); + const DepIndex global = this->getIndex(0x00); + setDepIndexNode(local, node); + setDepIndexNode(global, node); + //this->nodes[local] = this->nodes[global] = node; } } @@ -477,8 +523,10 @@ namespace gbe if (node0->insn.state.predicate != GEN_PREDICATE_NONE) return false; - const uint32_t index = this->getIndex(node0->insn.dst(0)); - ScheduleDAGNode *node1 = nodes[index]; + const DepIndex index = this->getIndex(node0->insn.dst(0)); + ScheduleDAGNode *node1 = NULL; + if (isSingleNodeIndex(index)) + node1 = nodes[index.addr]; if (node1 == NULL) return false; @@ -535,12 +583,12 @@ namespace gbe // read-after-write in memory if (insn.isRead()) { - const uint32_t index = tracker.getIndex(insn.getbti()); + const DepIndex index = tracker.getIndex(insn.getbti()); tracker.addDependency(node, index, READ_AFTER_WRITE); } //read-after-write of scratch memory if (insn.opcode == SEL_OP_UNSPILL_REG) { - const uint32_t index = tracker.getIndex(0xff); + const DepIndex index = tracker.getIndex(0xff); tracker.addDependency(node, index, READ_AFTER_WRITE); } @@ -548,8 +596,8 @@ namespace gbe if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_FENCE || insn.opcode == SEL_OP_WAIT) { - const uint32_t local = tracker.getIndex(0xfe); - const uint32_t global = tracker.getIndex(0x00); + const DepIndex local = tracker.getIndex(0xfe); + const DepIndex global = tracker.getIndex(0x00); tracker.addDependency(node, local, READ_AFTER_WRITE); tracker.addDependency(node, global, READ_AFTER_WRITE); } @@ -568,13 +616,13 @@ namespace gbe // write-after-write in memory if (insn.isWrite()) { - const uint32_t index = tracker.getIndex(insn.getbti()); + const DepIndex index = tracker.getIndex(insn.getbti()); tracker.addDependency(node, index, WRITE_AFTER_WRITE); } // write-after-write in scratch memory if (insn.opcode == SEL_OP_SPILL_REG) { - const uint32_t index = tracker.getIndex(0xff); + const DepIndex index = tracker.getIndex(0xff); tracker.addDependency(node, index, WRITE_AFTER_WRITE); } @@ -598,13 +646,13 @@ namespace gbe // write-after-read in memory if (insn.isRead()) { - const uint32_t index = tracker.getIndex(insn.getbti()); + const DepIndex index = tracker.getIndex(insn.getbti()); tracker.addDependency(index, node, WRITE_AFTER_READ); } // write-after-read in scratch memory if (insn.opcode == SEL_OP_UNSPILL_REG) { - const uint32_t index = tracker.getIndex(0xff); + const DepIndex index = tracker.getIndex(0xff); tracker.addDependency(index, node, WRITE_AFTER_READ); } @@ -612,8 +660,8 @@ namespace gbe if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_FENCE || insn.opcode == SEL_OP_WAIT) { - const uint32_t local = tracker.getIndex(0xfe); - const uint32_t global = tracker.getIndex(0x00); + const DepIndex local = tracker.getIndex(0xfe); + const DepIndex global = tracker.getIndex(0x00); tracker.addDependency(local, node, WRITE_AFTER_READ); tracker.addDependency(global, node, WRITE_AFTER_READ); } diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp index da58c06e..877f5d4a 100644 --- a/backend/src/backend/gen_register.hpp +++ b/backend/src/backend/gen_register.hpp @@ -61,6 +61,7 @@ #include "backend/gen_defs.hpp" #include "ir/register.hpp" +#include "ir/profile.hpp" #include "sys/platform.hpp" namespace gbe @@ -228,6 +229,7 @@ namespace gbe this->hstride = hstride; this->quarter = 0; this->address_mode = GEN_ADDRESS_DIRECT; + this->value.reg = ir::ocl::invalid; } /*! Return the IR virtual register */ |