diff options
author | Yang Rong <rong.r.yang@intel.com> | 2015-05-14 19:25:36 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-05-15 10:41:20 +0800 |
commit | f6bbef6404a6026fe774a07989f30a5c3578793e (patch) | |
tree | 3878e7ab416307128343e7326640849b363e3cc4 | |
parent | 1257a871669ea3e5ab9f3bdc885d69d41c6a522c (diff) |
Add Indirect struct argument read support.
The steps to handle Indirect argument read:
1. Find out all indirect loads and its address caculation.
2. Add INDIRECT_MOV IR instruction, replace load to INDIRECT_MOV.
3. Replace the bass address and offset ADD instruction to offset MOV instruction. Could optimize.
V2: use a tmp uw register to calc offset for indirect move.
V3: tmp can't be uniform, because exec width is not 1 when uniform.
Signed-off-by: Yang Rong <rong.r.yang@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@intel.com>
-rw-r--r-- | backend/src/backend/gen_context.cpp | 32 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 47 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.hpp | 1 | ||||
-rw-r--r-- | backend/src/backend/gen_register.hpp | 11 | ||||
-rw-r--r-- | backend/src/ir/instruction.cpp | 50 | ||||
-rw-r--r-- | backend/src/ir/instruction.hpp | 10 | ||||
-rw-r--r-- | backend/src/ir/instruction.hxx | 1 | ||||
-rw-r--r-- | backend/src/ir/lowering.cpp | 141 |
8 files changed, 257 insertions, 36 deletions
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 08a67fc3..94094fcb 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -1727,21 +1727,33 @@ namespace gbe } void GenContext::emitIndirectMoveInstruction(const SelectionInstruction &insn) { - GenRegister src = ra->genReg(insn.src(0)); - if(sel->isScalarReg(src.reg())) - src = GenRegister::retype(src, GEN_TYPE_UW); - else - src = GenRegister::unpacked_uw(src.nr, src.subnr / typeSize(GEN_TYPE_UW)); + GenRegister baseReg = ra->genReg(insn.src(0)); + GenRegister offset = ra->genReg(insn.src(1)); + uint32_t immoffset = insn.extra.indirect_offset; const GenRegister dst = ra->genReg(insn.dst(0)); + GenRegister tmp = ra->genReg(insn.dst(1)); const GenRegister a0 = GenRegister::addr8(0); uint32_t simdWidth = p->curr.execWidth; + GenRegister indirect_src; + + if(sel->isScalarReg(offset.reg())) + offset = GenRegister::retype(offset, GEN_TYPE_UW); + else + offset = GenRegister::unpacked_uw(offset.nr, offset.subnr / typeSize(GEN_TYPE_UW)); + uint32_t baseRegOffset = GenRegister::grfOffset(baseReg); + //There is a restrict that: lower 5 bits indirect reg SubRegNum and + //the lower 5 bits of indirect imm SubRegNum cannot exceed 5 bits. + //So can't use AddrImm field, need a add. + p->ADD(tmp, offset, GenRegister::immuw(baseRegOffset + immoffset)); + indirect_src = GenRegister::indirect(dst.type, 0, GEN_WIDTH_1, + GEN_VERTICAL_STRIDE_ONE_DIMENSIONAL, GEN_HORIZONTAL_STRIDE_0); p->push(); p->curr.execWidth = 8; p->curr.quarterControl = GEN_COMPRESSION_Q1; - p->MOV(a0, src); - p->MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8)); + p->MOV(a0, tmp); + p->MOV(dst, indirect_src); p->pop(); if (simdWidth == 16) { @@ -1750,9 +1762,9 @@ namespace gbe p->curr.quarterControl = GEN_COMPRESSION_Q2; const GenRegister nextDst = GenRegister::Qn(dst, 1); - const GenRegister nextSrc = GenRegister::Qn(src, 1); - p->MOV(a0, nextSrc); - p->MOV(nextDst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8)); + const GenRegister nextOffset = GenRegister::Qn(tmp, 1); + p->MOV(a0, nextOffset); + p->MOV(nextDst, indirect_src); p->pop(); } } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 67769ab4..f340b367 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -590,7 +590,7 @@ namespace gbe /*! Select instruction with embedded comparison */ void SEL_CMP(uint32_t conditional, Reg dst, Reg src0, Reg src1); /* Constant buffer move instruction */ - void INDIRECT_MOVE(Reg dst, Reg src); + void INDIRECT_MOVE(Reg dst, Reg tmp, Reg base, Reg regOffset, uint32_t immOffset); /*! EOT is used to finish GPGPU threads */ void EOT(void); /*! No-op */ @@ -1193,10 +1193,13 @@ namespace gbe insn->src(1) = src1; insn->extra.function = conditional; } - void Selection::Opaque::INDIRECT_MOVE(Reg dst, Reg src) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_INDIRECT_MOVE, 1, 1); + void Selection::Opaque::INDIRECT_MOVE(Reg dst, Reg tmp, Reg base, Reg regOffset, uint32_t immOffset) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_INDIRECT_MOVE, 2, 2); insn->dst(0) = dst; - insn->src(0) = src; + insn->dst(1) = tmp; + insn->src(0) = base; + insn->src(1) = regOffset; + insn->extra.indirect_offset = immOffset; } void Selection::Opaque::ATOMIC(Reg dst, uint32_t function, @@ -3433,18 +3436,6 @@ namespace gbe } } - void emitIndirectMove(Selection::Opaque &sel, - const ir::LoadInstruction &insn, - GenRegister address) const - { - using namespace ir; - GBE_ASSERT(insn.getValueNum() == 1); //todo: handle vec later - - const GenRegister dst = sel.selReg(insn.getValue(0), insn.getValueType()); - const GenRegister src = address; - sel.INDIRECT_MOVE(dst, src); - } - INLINE GenRegister getRelativeAddress(Selection::Opaque &sel, GenRegister address, uint8_t bti) const { if (bti == 0xfe || bti == BTI_CONSTANT) return address; @@ -4724,6 +4715,29 @@ namespace gbe } }; + /*! Get a region of a register */ + class IndirectMovInstructionPattern : public SelectionPattern + { + public: + IndirectMovInstructionPattern(void) : SelectionPattern(1,1) { + this->opcodes.push_back(ir::OP_INDIRECT_MOV); + } + INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { + using namespace ir; + const ir::IndirectMovInstruction &insn = cast<ir::IndirectMovInstruction>(dag.insn); + GenRegister dst, src0, src1; + uint32_t offset = insn.getOffset(); + dst = sel.selReg(insn.getDst(0), insn.getType()); + src0 = sel.selReg(insn.getSrc(0), TYPE_U32); + src1 = sel.selReg(insn.getSrc(1), TYPE_U32); + GenRegister tmp = sel.selReg(sel.reg(FAMILY_WORD), TYPE_U16); + + sel.INDIRECT_MOVE(dst, tmp, src0, src1, offset); + markAllChildren(dag); + return true; + } + }; + /*! Branch instruction pattern */ class BranchInstructionPattern : public SelectionPattern { @@ -4950,6 +4964,7 @@ namespace gbe this->insert<GetImageInfoInstructionPattern>(); this->insert<ReadARFInstructionPattern>(); this->insert<RegionInstructionPattern>(); + this->insert<IndirectMovInstructionPattern>(); this->insert<NullaryInstructionPattern>(); // Sort all the patterns with the number of instructions they output diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index dee35bbe..2262ef9a 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -131,6 +131,7 @@ namespace gbe }; uint32_t barrierType; bool longjmp; + uint32_t indirect_offset; } extra; /*! Gen opcode */ uint8_t opcode; diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp index 581f823c..80e143e9 100644 --- a/backend/src/backend/gen_register.hpp +++ b/backend/src/backend/gen_register.hpp @@ -272,6 +272,10 @@ namespace gbe return r; } + static INLINE uint32_t grfOffset(GenRegister reg) { + return reg.nr * GEN_REG_SIZE + reg.subnr; + } + // split a DWORD register into unpacked Byte or Short register static INLINE GenRegister splitReg(GenRegister reg, uint32_t count, uint32_t sub_part) { GenRegister r = reg; @@ -826,7 +830,8 @@ namespace gbe } /*! Build an indirectly addressed source */ - static INLINE GenRegister indirect(uint32_t type, uint32_t subnr, uint32_t width) { + static INLINE GenRegister indirect(uint32_t type, uint32_t subnr, uint32_t width, + uint32_t vstride, uint32_t hstride) { GenRegister reg; reg.type = type; reg.file = GEN_GENERAL_REGISTER_FILE; @@ -836,8 +841,8 @@ namespace gbe reg.nr = 0; reg.negation = 0; reg.absolute = 0; - reg.vstride = 0; - reg.hstride = 0; + reg.vstride = vstride; + reg.hstride = hstride; return reg; } diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index c38c4275..784ae9c7 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -729,6 +729,30 @@ namespace ir { Register src[1]; }; + class ALIGNED_INSTRUCTION IndirectMovInstruction : + public BasePolicy, + public NSrcPolicy<IndirectMovInstruction, 2>, + public NDstPolicy<IndirectMovInstruction, 1> + { + public: + INLINE IndirectMovInstruction(Type type, Register dst, Register src0, Register src1, uint32_t offset) { + this->type = type; + this->offset = offset; + this->dst[0] = dst; + this->src[0] = src0; + this->src[1] = src1; + this->opcode = OP_INDIRECT_MOV; + } + INLINE Type getType(void) const { return this->type; } + INLINE uint32_t getOffset(void) const { return this->offset; } + INLINE bool wellFormed(const Function &fn, std::string &why) const; + INLINE void out(std::ostream &out, const Function &fn) const; + Type type; + uint32_t offset; + Register dst[1]; + Register src[2]; + }; + class ALIGNED_INSTRUCTION LabelInstruction : public BasePolicy, public NSrcPolicy<LabelInstruction, 0>, @@ -1106,6 +1130,16 @@ namespace ir { return true; } + INLINE bool IndirectMovInstruction::wellFormed(const Function &fn, std::string &whyNot) const + { + const RegisterFamily family = getFamily(this->type); + if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false)) + return false; + if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false)) + return false; + return true; + } + // Only a label index is required INLINE bool LabelInstruction::wellFormed(const Function &fn, std::string &whyNot) const { @@ -1232,6 +1266,12 @@ namespace ir { out << " %" << this->getDst(fn, 0) << " %" << this->getSrc(fn, 0) << " offset: " << this->offset; } + INLINE void IndirectMovInstruction::out(std::ostream &out, const Function &fn) const { + this->outOpcode(out); + out << "." << type << " %" << this->getDst(fn, 0) << " %" << this->getSrc(fn, 0); + out << " %" << this->getSrc(fn, 1) << " offset: " << this->offset; + } + INLINE void LabelInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << " $" << labelIndex; @@ -1393,6 +1433,10 @@ START_INTROSPECTION(RegionInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(RegionInstruction) +START_INTROSPECTION(IndirectMovInstruction) +#include "ir/instruction.hxx" +END_INTROSPECTION(IndirectMovInstruction) + START_INTROSPECTION(LabelInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(LabelInstruction) @@ -1581,6 +1625,8 @@ DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters()) DECL_MEM_FN(ReadARFInstruction, Type, getType(void), getType()) DECL_MEM_FN(ReadARFInstruction, ARFRegister, getARFRegister(void), getARFRegister()) DECL_MEM_FN(RegionInstruction, uint32_t, getOffset(void), getOffset()) +DECL_MEM_FN(IndirectMovInstruction, uint32_t, getOffset(void), getOffset()) +DECL_MEM_FN(IndirectMovInstruction, Type, getType(void), getType()) DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType()) DECL_MEM_FN(SampleInstruction, Type, getDstType(void), getDstType()) DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerIndex(void), getSamplerIndex()) @@ -1807,6 +1853,10 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex return internal::RegionInstruction(dst, src, offset).convert(); } + Instruction INDIRECT_MOV(Type type, Register dst, Register src0, Register src1, uint32_t offset) { + return internal::IndirectMovInstruction(type, dst, src0, src1, offset).convert(); + } + // LABEL Instruction LABEL(LabelIndex labelIndex) { return internal::LabelInstruction(labelIndex).convert(); diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index e1bd05be..343d12ad 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -522,6 +522,15 @@ namespace ir { static bool isClassOf(const Instruction &insn); }; + /*! Indirect Move instruction */ + class IndirectMovInstruction : public Instruction { + public: + Type getType(void) const; + uint32_t getOffset(void) const; + /*! Return true if the given instruction is an instance of this class */ + static bool isClassOf(const Instruction &insn); + }; + /*! Specialize the instruction. Also performs typechecking first based on the * opcode. Crashes if it fails */ @@ -725,6 +734,7 @@ namespace ir { Instruction READ_ARF(Type type, Register dst, ARFRegister arf); Instruction REGION(Register dst, Register src, uint32_t offset); + Instruction INDIRECT_MOV(Type type, Register dst, Register src0, Register src1, uint32_t offset); /*! typed write */ Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t srcNum, Type srcType, Type coordType); /*! sample textures */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 76269bdd..10018370 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -85,6 +85,7 @@ DECL_INSN(SYNC, SyncInstruction) DECL_INSN(LABEL, LabelInstruction) DECL_INSN(READ_ARF, ReadARFInstruction) DECL_INSN(REGION, RegionInstruction) +DECL_INSN(INDIRECT_MOV, IndirectMovInstruction) DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_INSN(MUL_HI, BinaryInstruction) DECL_INSN(I64_MUL_HI, BinaryInstruction) diff --git a/backend/src/ir/lowering.cpp b/backend/src/ir/lowering.cpp index 0e36907b..e17248ae 100644 --- a/backend/src/ir/lowering.cpp +++ b/backend/src/ir/lowering.cpp @@ -87,9 +87,15 @@ namespace ir { uint64_t offset; //!< Offset where to load in the structure uint32_t argID; //!< Associated function argument }; + struct IndirectLoad { + Instruction *load; //!< Load from the argument + vector<Instruction *> adds; //!< Can be NULL if we only have load(arg) + uint32_t argID; //!< Associated function argument + }; /*! List of direct loads */ typedef vector<LoadAddImm> LoadAddImmSeq; + typedef vector<IndirectLoad> IndirectLoadSeq; /*! Helper class to lower function arguments if required */ class FunctionArgumentLowerer : public Context @@ -102,9 +108,13 @@ namespace ir { /*! Perform all function arguments substitution if needed */ void lower(const std::string &name); /*! Lower the given function argument accesses */ - void lower(uint32_t argID); + ArgUse lower(uint32_t argID); /*! Build the constant push for the function */ void buildConstantPush(void); + /* Lower indirect Read to indirct Mov */ + void lowerIndirectRead(uint32_t argID); + /* Convert indirectLoad to indirect Mov */ + void ReplaceIndirectLoad(void); /*! Inspect the given function argument to see how it is used. If this is * direct loads only, we also output the list of instructions used for each * load @@ -117,6 +127,7 @@ namespace ir { Liveness *liveness; //!< To compute the function graph FunctionDAG *dag; //!< Contains complete dependency information LoadAddImmSeq seq; //!< All the direct loads + IndirectLoadSeq indirectSeq; //!< All the indirect loads }; INLINE uint64_t getOffsetFromImm(const Immediate &imm) { @@ -183,15 +194,21 @@ namespace ir { // Process all structure arguments and find all the direct loads we can // replace const uint32_t argNum = fn->argNum(); + vector<uint32_t> indirctReadArgs; for (uint32_t argID = 0; argID < argNum; ++argID) { FunctionArgument &arg = fn->getArg(argID); if (arg.type != FunctionArgument::STRUCTURE) continue; - this->lower(argID); + if(this->lower(argID) == ARG_INDIRECT_READ) + indirctReadArgs.push_back(argID); } // Build the constant push description and remove the instruction that // therefore become useless this->buildConstantPush(); + for (uint32_t i = 0; i < indirctReadArgs.size(); ++i){ + lowerIndirectRead(indirctReadArgs[i]); + } + ReplaceIndirectLoad(); } // Remove all the given instructions from the stream (if dead) @@ -271,6 +288,115 @@ namespace ir { #undef REMOVE_INSN + void FunctionArgumentLowerer::lowerIndirectRead(uint32_t argID) + { + FunctionArgument &arg = fn->getArg(argID); + + vector<Register> derivedRegs; + map<Register, vector<Instruction *>> addPtrInsns; + derivedRegs.push_back(arg.reg); + + //Collect all load from this argument. + for(uint32_t i=0; i<derivedRegs.size(); i++) { + const UseSet *useSet = dag->getRegUse(derivedRegs[i]); + for (const auto &use : *useSet) { + Instruction *insn = const_cast<Instruction*>(use->getInstruction()); + const Opcode opcode = insn->getOpcode(); + const uint32_t dstNum = insn->getDstNum(); + GBE_ASSERT(dstNum == 1 || opcode == OP_LOAD); + const Register dst = insn->getDst(); + auto it = addPtrInsns.find(derivedRegs[i]); + + if((opcode == OP_ADD) && (derivedRegs[i] == arg.reg)) { + GBE_ASSERT(it == addPtrInsns.end()); + + vector<Instruction *> addInsns; + addInsns.push_back(insn); + addPtrInsns.insert(std::make_pair(dst, addInsns)); + derivedRegs.push_back(dst); + } else if(opcode == OP_LOAD) { + LoadInstruction *load = cast<LoadInstruction>(insn); + if (load->getAddressSpace() != MEM_PRIVATE) + continue; + + IndirectLoad indirectLoad; + Register addr = load->getAddress(); + indirectLoad.argID = argID; + indirectLoad.load = insn; + + auto addrIt = addPtrInsns.find(addr); + GBE_ASSERT(addrIt != addPtrInsns.end()); + indirectLoad.adds = addrIt->second; + + indirectSeq.push_back(indirectLoad); + } else { + auto dstIt = addPtrInsns.find(dst); + if(dstIt == addPtrInsns.end()) + addPtrInsns.insert(std::make_pair(dst, it->second)); + else { + //Muilt src from both argument, such as select, or phi, merge the vector + dstIt->second.insert(dstIt->second.end(), it->second.begin(), it->second.end()); + } + derivedRegs.push_back(dst); + } + } + } + } + + void FunctionArgumentLowerer::ReplaceIndirectLoad(void) + { + if (indirectSeq.size() == 0) + return; + + // Track instructions we remove to recursively kill them properly + set<const Instruction*> dead; + + set<PushLocation> inserted; + for (const auto &indirectLoad : indirectSeq) { + const Register arg = fn->getArg(indirectLoad.argID).reg; + if(dead.contains(indirectLoad.load)) continue; //repetitive load in the indirectSeq, skip. + LoadInstruction *load = cast<LoadInstruction>(indirectLoad.load); + const uint32_t valueNum = load->getValueNum(); + bool replaced = false; + Instruction *ins_after = load; // the instruction to insert after. + for (uint32_t valueID = 0; valueID < valueNum; ++valueID) { + const Type type = load->getValueType(); + const RegisterFamily family = getFamily(type); + const uint32_t size = getFamilySize(family); + const uint32_t offset = valueID * size; + + const Register reg = load->getValue(valueID); + + Instruction mov = ir::INDIRECT_MOV(type, reg, arg, load->getAddress(), offset); + mov.insert(ins_after, &ins_after); + replaced = true; + } + + if (replaced && !dead.contains(load)) { + dead.insert(load); + load->remove(); + } + + vector<Instruction *> adds = indirectLoad.adds; + for (uint32_t i=0; i<adds.size(); i++) { + BinaryInstruction *add = cast<BinaryInstruction>(adds[i]); + if (!dead.contains(add)) { + Register dst = add->getDst(); + const Register src0 = add->getSrc(0); + const Register src1 = add->getSrc(1); + + GBE_ASSERT(src0 == arg || src1 == arg); + Register src = (src0 == arg) ? src1 : src0; + Instruction mov = ir::MOV(add->getType(), dst, src); + + //MOV instruction could optimize if the dst don't write later + mov.replace(add); + dead.insert(add); + } + } + } + } + bool FunctionArgumentLowerer::useStore(const ValueDef &def, set<const Instruction*> &visited) { const UseSet &useSet = dag->getUse(def); @@ -376,17 +502,18 @@ namespace ir { return ARG_INDIRECT_READ; } - void FunctionArgumentLowerer::lower(uint32_t argID) { - IF_DEBUG(const ArgUse argUse = )this->getArgUse(argID); + ArgUse FunctionArgumentLowerer::lower(uint32_t argID) { + const ArgUse argUse = this->getArgUse(argID); #if GBE_DEBUG GBE_ASSERTM(argUse != ARG_WRITTEN, "TODO A store to a structure argument " "(i.e. not a char/short/int/float argument) has been found. " "This is not supported yet"); - GBE_ASSERTM(argUse != ARG_INDIRECT_READ, - "TODO Only direct loads of structure arguments are " - "supported now"); + //GBE_ASSERTM(argUse != ARG_INDIRECT_READ, + // "TODO Only direct loads of structure arguments are " + // "supported now"); #endif /* GBE_DEBUG */ + return argUse; } void lowerFunctionArguments(Unit &unit, const std::string &functionName) { |