summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2015-01-06 18:01:27 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-01-20 16:30:38 +0800
commit265b18ab819b5c89fb68d41dab0b755f87d2a130 (patch)
treede3766210346d90e542b37a4a21173e5d7f02e4c
parent97def1e1ac6065f559c8c6899df3dfd8401552c9 (diff)
Overload the READ64 and WRITE64 function for Gen8
We still use the old manner to read/write the long type data as 2 elements of DW data. After/Before read/write we will use the pack/unpacked function to convert it. v2: should put append vector after the append instruction. Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--backend/src/backend/gen8_context.cpp38
-rw-r--r--backend/src/backend/gen8_context.hpp2
-rw-r--r--backend/src/backend/gen_context.hpp4
-rw-r--r--backend/src/backend/gen_insn_selection.cpp118
4 files changed, 134 insertions, 28 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 85896df6..276b8c51 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -109,4 +109,42 @@ namespace gbe
p->pop();
}
+ void Gen8Context::emitRead64Instruction(const SelectionInstruction &insn)
+ {
+ const uint32_t bti = insn.getbti();
+ const uint32_t elemNum = insn.extra.elem;
+ GBE_ASSERT(elemNum == 1);
+
+ const GenRegister addr = ra->genReg(insn.src(0));
+ const GenRegister tmp_dst = ra->genReg(insn.dst(0));
+
+ /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
+ which we can not accept. We just fallback to 2 DW untyperead here. */
+ p->UNTYPED_READ(tmp_dst, addr, bti, elemNum*2);
+
+ for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
+ GenRegister long_tmp = ra->genReg(insn.dst(elemID));
+ GenRegister the_long = ra->genReg(insn.dst(elemID + elemNum));
+ this->packLongVec(long_tmp, the_long, p->curr.execWidth);
+ }
+ }
+
+ void Gen8Context::emitWrite64Instruction(const SelectionInstruction &insn)
+ {
+ const uint32_t bti = insn.getbti();
+ const uint32_t elemNum = insn.extra.elem;
+ GBE_ASSERT(elemNum == 1);
+
+ const GenRegister addr = ra->genReg(insn.src(elemNum));
+
+ /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
+ which we can not accept. We just fallback to 2 DW untypewrite here. */
+ for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
+ GenRegister the_long = ra->genReg(insn.src(elemID));
+ GenRegister long_tmp = ra->genReg(insn.src(elemNum + 1 + elemID));
+ this->unpackLongVec(the_long, long_tmp, p->curr.execWidth);
+ }
+
+ p->UNTYPED_WRITE(addr, bti, elemNum*2);
+ }
}
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 1b9125ba..d9e547b9 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -48,6 +48,8 @@ namespace gbe
/*! Get the pointer argument size for curbe alloc */
virtual uint32_t getPointerSize(void) { return 8; }
+ virtual void emitWrite64Instruction(const SelectionInstruction &insn);
+ virtual void emitRead64Instruction(const SelectionInstruction &insn);
protected:
virtual GenEncoder* generateEncoder(void) {
return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 45347b9c..317257bd 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -148,8 +148,8 @@ namespace gbe
void emitBarrierInstruction(const SelectionInstruction &insn);
void emitFenceInstruction(const SelectionInstruction &insn);
void emitMathInstruction(const SelectionInstruction &insn);
- void emitRead64Instruction(const SelectionInstruction &insn);
- void emitWrite64Instruction(const SelectionInstruction &insn);
+ virtual void emitRead64Instruction(const SelectionInstruction &insn);
+ virtual void emitWrite64Instruction(const SelectionInstruction &insn);
void emitUntypedReadInstruction(const SelectionInstruction &insn);
void emitUntypedWriteInstruction(const SelectionInstruction &insn);
void emitAtomicInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 11fd5fe6..1dcdc41e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -569,9 +569,9 @@ namespace gbe
/*! Atomic instruction */
void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, uint32_t bti);
/*! Read 64 bits float/int array */
- void READ64(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
+ void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, uint32_t bti, bool native_long);
/*! Write 64 bits float/int array */
- void WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, uint32_t bti);
+ void WRITE64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum, uint32_t bti, bool native_long);
/*! Untyped read (up to 4 elements) */
void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
/*! Untyped write (up to 4 elements) */
@@ -1127,16 +1127,34 @@ namespace gbe
void Selection::Opaque::READ64(Reg addr,
const GenRegister *dst,
+ const GenRegister *tmp,
uint32_t elemNum,
- uint32_t bti)
+ uint32_t bti,
+ bool native_long)
{
- SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum, 1);
- SelectionVector *srcVector = this->appendVector();
- SelectionVector *dstVector = this->appendVector();
+ SelectionInstruction *insn = NULL;
+ SelectionVector *srcVector = NULL;
+ SelectionVector *dstVector = NULL;
+
+ if (!native_long) {
+ insn = this->appendInsn(SEL_OP_READ64, elemNum, 1);
+ srcVector = this->appendVector();
+ dstVector = this->appendVector();
+ // Regular instruction to encode
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ insn->dst(elemID) = dst[elemID];
+ } else {
+ insn = this->appendInsn(SEL_OP_READ64, elemNum*2, 1);
+ srcVector = this->appendVector();
+ dstVector = this->appendVector();
+
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ insn->dst(elemID) = tmp[elemID];
+
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ insn->dst(elemID + elemNum) = dst[elemID];
+ }
- // Regular instruction to encode
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
- insn->dst(elemID) = dst[elemID];
insn->src(0) = addr;
insn->setbti(bti);
insn->extra.elem = elemNum;
@@ -1179,23 +1197,52 @@ namespace gbe
void Selection::Opaque::WRITE64(Reg addr,
const GenRegister *src,
+ const GenRegister *tmp,
uint32_t srcNum,
- uint32_t bti)
+ uint32_t bti,
+ bool native_long)
{
- SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, 0, srcNum + 1);
- SelectionVector *vector = this->appendVector();
-
- // Regular instruction to encode
- insn->src(0) = addr;
- for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
- insn->src(elemID + 1) = src[elemID];
-
- insn->setbti(bti);
- insn->extra.elem = srcNum;
-
- vector->regNum = srcNum + 1;
- vector->reg = &insn->src(0);
- vector->isSrc = 1;
+ SelectionVector *vector = NULL;
+ SelectionInstruction *insn = NULL;
+
+ if (!native_long) {
+ insn = this->appendInsn(SEL_OP_WRITE64, 0, srcNum + 1);
+ vector = this->appendVector();
+ // Regular instruction to encode
+ insn->src(0) = addr;
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->src(elemID + 1) = src[elemID];
+
+ insn->setbti(bti);
+ insn->extra.elem = srcNum;
+
+ vector->regNum = srcNum + 1;
+ vector->reg = &insn->src(0);
+ vector->isSrc = 1;
+ } else { // handle the native long case
+ insn = this->appendInsn(SEL_OP_WRITE64, srcNum, srcNum*2 + 1);
+ vector = this->appendVector();
+
+ insn->src(0) = addr;
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->src(elemID) = src[elemID];
+
+ insn->src(srcNum) = addr;
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->src(srcNum + 1 + elemID) = tmp[0];
+
+ /* We also need to add the tmp reigster to dst, in order
+ to avoid the post schedule error . */
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->dst(elemID) = tmp[0];
+
+ insn->setbti(bti);
+ insn->extra.elem = srcNum;
+
+ vector->regNum = srcNum + 1;
+ vector->reg = &insn->src(srcNum);
+ vector->isSrc = 1;
+ }
}
void Selection::Opaque::UNTYPED_WRITE(Reg addr,
@@ -2932,7 +2979,17 @@ namespace gbe
GenRegister tmpAddr = getRelativeAddress(sel, addr, bti.bti[0]);
for ( uint32_t dstID = 0; dstID < valueNum; ++dstID)
dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64);
- sel.READ64(tmpAddr, dst.data(), valueNum, bti.bti[0]);
+
+ if (sel.hasLongType()) {
+ vector<GenRegister> tmp(valueNum);
+ for (uint32_t valueID = 0; valueID < valueNum; ++valueID) {
+ tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL);
+ }
+
+ sel.READ64(tmpAddr, dst.data(), tmp.data(), valueNum, bti.bti[0], true);
+ } else {
+ sel.READ64(tmpAddr, dst.data(), NULL, valueNum, bti.bti[0], false);
+ }
}
void readByteAsDWord(Selection::Opaque &sel,
@@ -3247,7 +3304,16 @@ namespace gbe
for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64);
- sel.WRITE64(addr, src.data(), valueNum, bti);
+
+ if (sel.hasLongType()) {
+ vector<GenRegister> tmp(valueNum);
+ for (uint32_t valueID = 0; valueID < valueNum; ++valueID) {
+ tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL);
+ }
+ sel.WRITE64(addr, src.data(), tmp.data(), valueNum, bti, true);
+ } else {
+ sel.WRITE64(addr, src.data(), NULL, valueNum, bti, false);
+ }
}
void emitByteScatter(Selection::Opaque &sel,