diff options
author | Junyan He <junyan.he@linux.intel.com> | 2015-01-06 18:01:27 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-01-20 16:30:38 +0800 |
commit | 265b18ab819b5c89fb68d41dab0b755f87d2a130 (patch) | |
tree | de3766210346d90e542b37a4a21173e5d7f02e4c | |
parent | 97def1e1ac6065f559c8c6899df3dfd8401552c9 (diff) |
Overload the READ64 and WRITE64 function for Gen8
We still use the old manner to read/write the long
type data as 2 elements of DW data. After/Before
read/write we will use the pack/unpacked function
to convert it.
v2:
should put append vector after the append instruction.
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r-- | backend/src/backend/gen8_context.cpp | 38 | ||||
-rw-r--r-- | backend/src/backend/gen8_context.hpp | 2 | ||||
-rw-r--r-- | backend/src/backend/gen_context.hpp | 4 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 118 |
4 files changed, 134 insertions, 28 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 85896df6..276b8c51 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -109,4 +109,42 @@ namespace gbe p->pop(); } + void Gen8Context::emitRead64Instruction(const SelectionInstruction &insn) + { + const uint32_t bti = insn.getbti(); + const uint32_t elemNum = insn.extra.elem; + GBE_ASSERT(elemNum == 1); + + const GenRegister addr = ra->genReg(insn.src(0)); + const GenRegister tmp_dst = ra->genReg(insn.dst(0)); + + /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless, + which we can not accept. We just fallback to 2 DW untyperead here. */ + p->UNTYPED_READ(tmp_dst, addr, bti, elemNum*2); + + for (uint32_t elemID = 0; elemID < elemNum; elemID++) { + GenRegister long_tmp = ra->genReg(insn.dst(elemID)); + GenRegister the_long = ra->genReg(insn.dst(elemID + elemNum)); + this->packLongVec(long_tmp, the_long, p->curr.execWidth); + } + } + + void Gen8Context::emitWrite64Instruction(const SelectionInstruction &insn) + { + const uint32_t bti = insn.getbti(); + const uint32_t elemNum = insn.extra.elem; + GBE_ASSERT(elemNum == 1); + + const GenRegister addr = ra->genReg(insn.src(elemNum)); + + /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless, + which we can not accept. We just fallback to 2 DW untypewrite here. */ + for (uint32_t elemID = 0; elemID < elemNum; elemID++) { + GenRegister the_long = ra->genReg(insn.src(elemID)); + GenRegister long_tmp = ra->genReg(insn.src(elemNum + 1 + elemID)); + this->unpackLongVec(the_long, long_tmp, p->curr.execWidth); + } + + p->UNTYPED_WRITE(addr, bti, elemNum*2); + } } diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp index 1b9125ba..d9e547b9 100644 --- a/backend/src/backend/gen8_context.hpp +++ b/backend/src/backend/gen8_context.hpp @@ -48,6 +48,8 @@ namespace gbe /*! Get the pointer argument size for curbe alloc */ virtual uint32_t getPointerSize(void) { return 8; } + virtual void emitWrite64Instruction(const SelectionInstruction &insn); + virtual void emitRead64Instruction(const SelectionInstruction &insn); protected: virtual GenEncoder* generateEncoder(void) { return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 45347b9c..317257bd 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -148,8 +148,8 @@ namespace gbe void emitBarrierInstruction(const SelectionInstruction &insn); void emitFenceInstruction(const SelectionInstruction &insn); void emitMathInstruction(const SelectionInstruction &insn); - void emitRead64Instruction(const SelectionInstruction &insn); - void emitWrite64Instruction(const SelectionInstruction &insn); + virtual void emitRead64Instruction(const SelectionInstruction &insn); + virtual void emitWrite64Instruction(const SelectionInstruction &insn); void emitUntypedReadInstruction(const SelectionInstruction &insn); void emitUntypedWriteInstruction(const SelectionInstruction &insn); void emitAtomicInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 11fd5fe6..1dcdc41e 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -569,9 +569,9 @@ namespace gbe /*! Atomic instruction */ void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, uint32_t bti); /*! Read 64 bits float/int array */ - void READ64(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti); + void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, uint32_t bti, bool native_long); /*! Write 64 bits float/int array */ - void WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, uint32_t bti); + void WRITE64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum, uint32_t bti, bool native_long); /*! Untyped read (up to 4 elements) */ void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti); /*! Untyped write (up to 4 elements) */ @@ -1127,16 +1127,34 @@ namespace gbe void Selection::Opaque::READ64(Reg addr, const GenRegister *dst, + const GenRegister *tmp, uint32_t elemNum, - uint32_t bti) + uint32_t bti, + bool native_long) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum, 1); - SelectionVector *srcVector = this->appendVector(); - SelectionVector *dstVector = this->appendVector(); + SelectionInstruction *insn = NULL; + SelectionVector *srcVector = NULL; + SelectionVector *dstVector = NULL; + + if (!native_long) { + insn = this->appendInsn(SEL_OP_READ64, elemNum, 1); + srcVector = this->appendVector(); + dstVector = this->appendVector(); + // Regular instruction to encode + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) + insn->dst(elemID) = dst[elemID]; + } else { + insn = this->appendInsn(SEL_OP_READ64, elemNum*2, 1); + srcVector = this->appendVector(); + dstVector = this->appendVector(); + + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) + insn->dst(elemID) = tmp[elemID]; + + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) + insn->dst(elemID + elemNum) = dst[elemID]; + } - // Regular instruction to encode - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) - insn->dst(elemID) = dst[elemID]; insn->src(0) = addr; insn->setbti(bti); insn->extra.elem = elemNum; @@ -1179,23 +1197,52 @@ namespace gbe void Selection::Opaque::WRITE64(Reg addr, const GenRegister *src, + const GenRegister *tmp, uint32_t srcNum, - uint32_t bti) + uint32_t bti, + bool native_long) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, 0, srcNum + 1); - SelectionVector *vector = this->appendVector(); - - // Regular instruction to encode - insn->src(0) = addr; - for (uint32_t elemID = 0; elemID < srcNum; ++elemID) - insn->src(elemID + 1) = src[elemID]; - - insn->setbti(bti); - insn->extra.elem = srcNum; - - vector->regNum = srcNum + 1; - vector->reg = &insn->src(0); - vector->isSrc = 1; + SelectionVector *vector = NULL; + SelectionInstruction *insn = NULL; + + if (!native_long) { + insn = this->appendInsn(SEL_OP_WRITE64, 0, srcNum + 1); + vector = this->appendVector(); + // Regular instruction to encode + insn->src(0) = addr; + for (uint32_t elemID = 0; elemID < srcNum; ++elemID) + insn->src(elemID + 1) = src[elemID]; + + insn->setbti(bti); + insn->extra.elem = srcNum; + + vector->regNum = srcNum + 1; + vector->reg = &insn->src(0); + vector->isSrc = 1; + } else { // handle the native long case + insn = this->appendInsn(SEL_OP_WRITE64, srcNum, srcNum*2 + 1); + vector = this->appendVector(); + + insn->src(0) = addr; + for (uint32_t elemID = 0; elemID < srcNum; ++elemID) + insn->src(elemID) = src[elemID]; + + insn->src(srcNum) = addr; + for (uint32_t elemID = 0; elemID < srcNum; ++elemID) + insn->src(srcNum + 1 + elemID) = tmp[0]; + + /* We also need to add the tmp reigster to dst, in order + to avoid the post schedule error . */ + for (uint32_t elemID = 0; elemID < srcNum; ++elemID) + insn->dst(elemID) = tmp[0]; + + insn->setbti(bti); + insn->extra.elem = srcNum; + + vector->regNum = srcNum + 1; + vector->reg = &insn->src(srcNum); + vector->isSrc = 1; + } } void Selection::Opaque::UNTYPED_WRITE(Reg addr, @@ -2932,7 +2979,17 @@ namespace gbe GenRegister tmpAddr = getRelativeAddress(sel, addr, bti.bti[0]); for ( uint32_t dstID = 0; dstID < valueNum; ++dstID) dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64); - sel.READ64(tmpAddr, dst.data(), valueNum, bti.bti[0]); + + if (sel.hasLongType()) { + vector<GenRegister> tmp(valueNum); + for (uint32_t valueID = 0; valueID < valueNum; ++valueID) { + tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL); + } + + sel.READ64(tmpAddr, dst.data(), tmp.data(), valueNum, bti.bti[0], true); + } else { + sel.READ64(tmpAddr, dst.data(), NULL, valueNum, bti.bti[0], false); + } } void readByteAsDWord(Selection::Opaque &sel, @@ -3247,7 +3304,16 @@ namespace gbe for (uint32_t valueID = 0; valueID < valueNum; ++valueID) src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64); - sel.WRITE64(addr, src.data(), valueNum, bti); + + if (sel.hasLongType()) { + vector<GenRegister> tmp(valueNum); + for (uint32_t valueID = 0; valueID < valueNum; ++valueID) { + tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL); + } + sel.WRITE64(addr, src.data(), tmp.data(), valueNum, bti, true); + } else { + sel.WRITE64(addr, src.data(), NULL, valueNum, bti, false); + } } void emitByteScatter(Selection::Opaque &sel, |