summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYang Rong <rong.r.yang@intel.com>2015-04-29 17:06:44 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-05-04 09:41:08 +0800
commit9dedb7bb929cc3580292a0db8334537dc2493fd8 (patch)
treec7e137359de5db265269b8dc2c7313597fb15378
parent7f5ba4e71c64c9bfb5efc6a7255a9f600ff2ea37 (diff)
Chv: Add chv backend support.
The chv's backend is almost same as bdw. But some long register restrictions: 1. ARF registers must never be used with 64b datatype. 2. Source and Destination horizontal stride must be aligned to the same qword. 3. Source and Destination offset must be the same, except the case of scalar source. Add ChvContent in gen8_context.cpp to handle it. The chv's encoder is same as Gen8Encoder. V2: Fix sz a typo in function ChvContext::setA0Content when rebase. Signed-off-by: Yang Rong <rong.r.yang@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--backend/src/backend/gen8_context.cpp124
-rw-r--r--backend/src/backend/gen8_context.hpp23
-rw-r--r--backend/src/backend/gen_insn_selection.cpp47
-rw-r--r--backend/src/backend/gen_insn_selection.hpp7
-rw-r--r--backend/src/backend/gen_program.cpp2
5 files changed, 193 insertions, 10 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 2cdb2482..bf5d9c7f 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -328,7 +328,7 @@ namespace gbe
return GenRegister::unpacked_ud(reg.nr, reg.subnr + offset);
}
- static void calculateFullU64MUL(GenEncoder* p, GenRegister src0, GenRegister src1, GenRegister dst_h,
+ void Gen8Context::calculateFullU64MUL(GenRegister src0, GenRegister src1, GenRegister dst_h,
GenRegister dst_l, GenRegister s0l_s1h, GenRegister s0h_s1l)
{
src0.type = src1.type = GEN_TYPE_UD;
@@ -374,7 +374,7 @@ namespace gbe
p->ADD(dst_h, dst_h, tmp);
}
- static void calculateFullS64MUL(GenEncoder* p, GenRegister src0, GenRegister src1, GenRegister dst_h,
+ void Gen8Context::calculateFullS64MUL(GenRegister src0, GenRegister src1, GenRegister dst_h,
GenRegister dst_l, GenRegister s0_abs, GenRegister s1_abs,
GenRegister tmp0, GenRegister tmp1, GenRegister sign, GenRegister flagReg)
{
@@ -392,7 +392,7 @@ namespace gbe
s0_abs.type = s1_abs.type = GEN_TYPE_L;
p->MOV(s0_abs, GenRegister::abs(src0));
p->MOV(s1_abs, GenRegister::abs(src1));
- calculateFullU64MUL(p, s0_abs, s1_abs, dst_h, dst_l, tmp0, tmp1);
+ calculateFullU64MUL(s0_abs, s1_abs, dst_h, dst_l, tmp0, tmp1);
p->push();
p->curr.predicate = GEN_PREDICATE_NONE;
@@ -429,11 +429,11 @@ namespace gbe
if(src0.type == GEN_TYPE_UL) {
GBE_ASSERT(src1.type == GEN_TYPE_UL);
- calculateFullU64MUL(p, src0, src1, dst_h, dst_l, tmp0, tmp1);
+ calculateFullU64MUL(src0, src1, dst_h, dst_l, tmp0, tmp1);
} else {
GBE_ASSERT(src0.type == GEN_TYPE_L);
GBE_ASSERT(src1.type == GEN_TYPE_L);
- calculateFullS64MUL(p, src0, src1, dst_h, dst_l, s0_abs, s1_abs, tmp0,
+ calculateFullS64MUL(src0, src1, dst_h, dst_l, s0_abs, s1_abs, tmp0,
tmp1, sign, flagReg);
}
}
@@ -458,7 +458,7 @@ namespace gbe
GBE_ASSERT(src2.type == GEN_TYPE_UL);
dst_l.type = dst_h.type = GEN_TYPE_UL;
tmp0.type = tmp1.type = GEN_TYPE_UL;
- calculateFullU64MUL(p, src0, src1, dst_h, dst_l, tmp0, tmp1);
+ calculateFullU64MUL(src0, src1, dst_h, dst_l, tmp0, tmp1);
/* Inplement the logic:
dst_l += src2;
@@ -493,7 +493,7 @@ namespace gbe
GBE_ASSERT(src1.type == GEN_TYPE_L);
GBE_ASSERT(src2.type == GEN_TYPE_L);
- calculateFullS64MUL(p, src0, src1, dst_h, dst_l, s0_abs, s1_abs, tmp0,
+ calculateFullS64MUL(src0, src1, dst_h, dst_l, s0_abs, s1_abs, tmp0,
tmp1, sign, flagReg);
GenRegister sum = sign;
@@ -876,4 +876,114 @@ namespace gbe
}
p->pop();
}
+
+ void ChvContext::newSelection(void) {
+ this->sel = GBE_NEW(SelectionChv, *this);
+ }
+
+ void ChvContext::calculateFullU64MUL(GenRegister src0, GenRegister src1, GenRegister dst_h,
+ GenRegister dst_l, GenRegister s0l_s1h, GenRegister s0h_s1l)
+ {
+ src0.type = src1.type = GEN_TYPE_UD;
+ dst_h.type = dst_l.type = GEN_TYPE_UL;
+ s0l_s1h.type = s0h_s1l.type = GEN_TYPE_UL;
+
+ //GenRegister tmp;
+
+ GenRegister s0l = unpacked_ud(src0);
+ GenRegister s1l = unpacked_ud(src1);
+ GenRegister s0h = unpacked_ud(s0l_s1h); //s0h only used before s0l_s1h, reuse s0l_s1h
+ GenRegister s1h = unpacked_ud(dst_l); //s1h only used before dst_l, reuse dst_l
+
+ p->MOV(s0h, GenRegister::offset(s0l, 0, 4));
+ p->MOV(s1h, GenRegister::offset(s1l, 0, 4));
+
+ /* High 32 bits X High 32 bits. */
+ p->MUL(dst_h, s0h, s1h);
+ /* High 32 bits X low 32 bits. */
+ p->MUL(s0h_s1l, s0h, s1l);
+ /* Low 32 bits X high 32 bits. */
+ p->MUL(s0l_s1h, s0l, s1h);
+ /* Low 32 bits X low 32 bits. */
+ p->MUL(dst_l, s0l, s1l);
+
+ /* Because the max product of s0l*s1h is (2^N - 1) * (2^N - 1) = 2^2N + 1 - 2^(N+1), here N = 32
+ The max of addding 2 32bits integer to it is
+ 2^2N + 1 - 2^(N+1) + 2*(2^N - 1) = 2^2N - 1
+ which means the product s0h_s1l adds dst_l's high 32 bits and then adds s0l_s1h's low 32 bits will not
+ overflow and have no carry.
+ By this manner, we can avoid using acc register, which has a lot of restrictions. */
+
+ GenRegister s0l_s1h_l = unpacked_ud(s0l_s1h);
+ p->ADD(s0h_s1l, s0h_s1l, s0l_s1h_l);
+
+ p->SHR(s0l_s1h, s0l_s1h, GenRegister::immud(32));
+ GenRegister s0l_s1h_h = unpacked_ud(s0l_s1h);
+ p->ADD(dst_h, dst_h, s0l_s1h_h);
+
+ GenRegister dst_l_h = unpacked_ud(s0l_s1h);
+ p->MOV(dst_l_h, unpacked_ud(dst_l, 1));
+ p->ADD(s0h_s1l, s0h_s1l, dst_l_h);
+
+ // No longer need s0l_s1h
+ GenRegister tmp = s0l_s1h;
+
+ p->SHL(tmp, s0h_s1l, GenRegister::immud(32));
+ GenRegister tmp_unpacked = unpacked_ud(tmp, 1);
+ p->MOV(unpacked_ud(dst_l, 1), tmp_unpacked);
+
+ p->SHR(tmp, s0h_s1l, GenRegister::immud(32));
+ p->ADD(dst_h, dst_h, tmp);
+ }
+
+ void ChvContext::emitI64MULInstruction(const SelectionInstruction &insn)
+ {
+ GenRegister src0 = ra->genReg(insn.src(0));
+ GenRegister src1 = ra->genReg(insn.src(1));
+ GenRegister dst = ra->genReg(insn.dst(0));
+ GenRegister res = ra->genReg(insn.dst(1));
+
+ src0.type = src1.type = GEN_TYPE_UD;
+ dst.type = GEN_TYPE_UL;
+ res.type = GEN_TYPE_UL;
+
+ /* Low 32 bits X low 32 bits. */
+ GenRegister s0l = unpacked_ud(src0);
+ GenRegister s1l = unpacked_ud(src1);
+ p->MUL(dst, s0l, s1l);
+
+ /* Low 32 bits X high 32 bits. */
+ GenRegister s1h = unpacked_ud(res);
+ p->MOV(s1h, unpacked_ud(src1, 1));
+
+ p->MUL(res, s0l, s1h);
+ p->SHL(res, res, GenRegister::immud(32));
+ p->ADD(dst, dst, res);
+
+ /* High 32 bits X low 32 bits. */
+ GenRegister s0h = unpacked_ud(res);
+ p->MOV(s0h, unpacked_ud(src0, 1));
+
+ p->MUL(res, s0h, s1l);
+ p->SHL(res, res, GenRegister::immud(32));
+ p->ADD(dst, dst, res);
+ }
+
+ void ChvContext::setA0Content(uint16_t new_a0[16], uint16_t max_offset, int sz) {
+ if (sz == 0)
+ sz = 16;
+ GBE_ASSERT(sz%4 == 0);
+ GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096);
+
+ p->push();
+ p->curr.execWidth = 1;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ for (int i = 0; i < sz/2; i++) {
+ p->MOV(GenRegister::retype(GenRegister::addr1(i*2), GEN_TYPE_UD),
+ GenRegister::immud(new_a0[i*2 + 1] << 16 | new_a0[i*2]));
+ }
+ p->pop();
+ }
+
}
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index b296a3d9..88279553 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -82,6 +82,29 @@ namespace gbe
virtual void newSelection(void);
void packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd);
void unpackLongVec(GenRegister packed, GenRegister unpacked, uint32_t simd);
+ void calculateFullS64MUL(GenRegister src0, GenRegister src1, GenRegister dst_h,
+ GenRegister dst_l, GenRegister s0_abs, GenRegister s1_abs,
+ GenRegister tmp0, GenRegister tmp1, GenRegister sign, GenRegister flagReg);
+ virtual void calculateFullU64MUL(GenRegister src0, GenRegister src1, GenRegister dst_h,
+ GenRegister dst_l, GenRegister s0l_s1h, GenRegister s0h_s1l);
+ };
+
+ class ChvContext : public Gen8Context
+ {
+ public:
+ virtual ~ChvContext(void) { }
+ ChvContext(const ir::Unit &unit, const std::string &name, uint32_t deviceID, bool relaxMath = false)
+ : Gen8Context(unit, name, deviceID, relaxMath) {
+ };
+ virtual void emitI64MULInstruction(const SelectionInstruction &insn);
+
+ protected:
+ virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset = 0, int sz = 0);
+
+ private:
+ virtual void newSelection(void);
+ virtual void calculateFullU64MUL(GenRegister src0, GenRegister src1, GenRegister dst_h,
+ GenRegister dst_l, GenRegister s0l_s1h, GenRegister s0h_s1l);
};
}
#endif /* __GBE_GEN8_CONTEXT_HPP__ */
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 19a3c24b..9e15ae07 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -361,6 +361,8 @@ namespace gbe
void setHas32X32Mul(bool b) { bHas32X32Mul = b; }
bool hasLongType() const { return bHasLongType; }
void setHasLongType(bool b) { bHasLongType = b; }
+ bool hasLongRegRestrict() { return bLongRegRestrict; }
+ void setLongRegRestrict(bool b) { bLongRegRestrict = b; }
void setLdMsgOrder(uint32_t type) { ldMsgOrder = type; }
uint32_t getLdMsgOrder() const { return ldMsgOrder; }
/*! indicate whether a register is a scalar/uniform register. */
@@ -720,6 +722,7 @@ namespace gbe
uint32_t currAuxLabel;
bool bHas32X32Mul;
bool bHasLongType;
+ bool bLongRegRestrict;
uint32_t ldMsgOrder;
INLINE ir::LabelIndex newAuxLabel()
{
@@ -760,7 +763,7 @@ namespace gbe
curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()),
maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
stateNum(0), vectorNum(0), bwdCodeGeneration(false), currAuxLabel(ctx.getFunction().labelNum()),
- bHas32X32Mul(false), bHasLongType(false), ldMsgOrder(LD_MSG_ORDER_IVB)
+ bHas32X32Mul(false), bHasLongType(false), bLongRegRestrict(false), ldMsgOrder(LD_MSG_ORDER_IVB)
{
const ir::Function &fn = ctx.getFunction();
this->regNum = fn.regNum();
@@ -1918,6 +1921,12 @@ namespace gbe
this->opaque->setHasLongType(true);
}
+ SelectionChv::SelectionChv(GenContext &ctx) : Selection(ctx) {
+ this->opaque->setHas32X32Mul(true);
+ this->opaque->setHasLongType(true);
+ this->opaque->setLongRegRestrict(true);
+ }
+
Selection9::Selection9(GenContext &ctx) : Selection(ctx) {
this->opaque->setHas32X32Mul(true);
this->opaque->setHasLongType(true);
@@ -4137,7 +4146,41 @@ namespace gbe
sel.MOV(dst, unpacked);
}
}
- } else if ((dst.isdf() && srcType == ir::TYPE_FLOAT) ||
+ } else if (sel.hasLongType() && sel.hasLongRegRestrict() && dstFamily == FAMILY_QWORD && srcFamily != FAMILY_QWORD) {
+ // Convert i32/i16/i8/float to i64/double if hasLongRegRestrict(src and dst hstride must be aligned to the same qword).
+ GenRegister unpacked;
+ GenRegister unpacked_src = src;
+
+ sel.push();
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+
+ if(srcType == ir::TYPE_FLOAT) {
+ unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
+ unpacked = GenRegister::retype(unpacked, GEN_TYPE_F);
+ } else if(srcFamily == FAMILY_DWORD) {
+ unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
+ unpacked = GenRegister::retype(unpacked, dstType == TYPE_U64 ? GEN_TYPE_UD : GEN_TYPE_D);
+ } else if(srcFamily == FAMILY_WORD) {
+ unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
+ unpacked = GenRegister::retype(unpacked, dstType == TYPE_U64 ? GEN_TYPE_UW : GEN_TYPE_W);
+ } else if(srcFamily == FAMILY_BYTE) {
+ GenRegister tmp = sel.selReg(sel.reg(FAMILY_WORD, sel.isScalarReg(insn.getSrc(0))));
+ tmp = GenRegister::retype(tmp, dstType == TYPE_U64 ? GEN_TYPE_UW : GEN_TYPE_W);
+ unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
+ unpacked = GenRegister::retype(unpacked, dstType == TYPE_U64 ? GEN_TYPE_UW : GEN_TYPE_W);
+ sel.MOV(tmp, src);
+ unpacked_src = tmp;
+ } else
+ GBE_ASSERT(0);
+
+ sel.MOV(unpacked, unpacked_src);
+ sel.pop();
+ sel.MOV(dst, unpacked);
+ }else if ((dst.isdf() && srcType == ir::TYPE_FLOAT) ||
(src.isdf() && dstType == ir::TYPE_FLOAT)) { // float and double conversion
ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64));
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 7c9bce5e..dee35bbe 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -293,6 +293,13 @@ namespace gbe
Selection8(GenContext &ctx);
};
+ class SelectionChv: public Selection
+ {
+ public:
+ /*! Initialize internal structures used for the selection */
+ SelectionChv(GenContext &ctx);
+ };
+
class Selection9: public Selection
{
public:
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index f53d5fb6..c761a2f4 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -167,7 +167,7 @@ namespace gbe {
} else if (IS_BROADWELL(deviceID)) {
ctx = GBE_NEW(Gen8Context, unit, name, deviceID, relaxMath);
} else if (IS_CHERRYVIEW(deviceID)) {
- ctx = GBE_NEW(Gen8Context, unit, name, deviceID, relaxMath);
+ ctx = GBE_NEW(ChvContext, unit, name, deviceID, relaxMath);
} else if (IS_SKYLAKE(deviceID)) {
ctx = GBE_NEW(Gen9Context, unit, name, deviceID, relaxMath);
}