diff options
author | Junyan He <junyan.he@linux.intel.com> | 2015-01-06 18:02:07 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-01-20 16:30:56 +0800 |
commit | 7149dc959988c9c8e29cd407eb585377f646db3a (patch) | |
tree | 444781d6563725c7438d5a067dc30ebb50dae10f | |
parent | 9c7d2346783638e0aa80ff8d6477e6387b12073e (diff) |
Overload I64MUL function.
BDW supports 32 by 32 instruction, so we can refine
the MUL instruction of long by using it.
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r-- | backend/src/backend/gen8_context.cpp | 31 | ||||
-rw-r--r-- | backend/src/backend/gen8_context.hpp | 1 | ||||
-rw-r--r-- | backend/src/backend/gen_context.hpp | 2 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 34 |
4 files changed, 58 insertions, 10 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 18a34256..85b72a5b 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -102,6 +102,37 @@ namespace gbe } } + void Gen8Context::emitI64MULInstruction(const SelectionInstruction &insn) + { + GenRegister src0 = ra->genReg(insn.src(0)); + GenRegister src1 = ra->genReg(insn.src(1)); + GenRegister dst = ra->genReg(insn.dst(0)); + GenRegister res = ra->genReg(insn.dst(1)); + + src0.type = src1.type = GEN_TYPE_UD; + dst.type = GEN_TYPE_UL; + res.type = GEN_TYPE_UL; + + /* Low 32 bits X low 32 bits. */ + GenRegister s0l = src0.hstride == GEN_HORIZONTAL_STRIDE_0 ? + GenRegister::retype(src0, GEN_TYPE_UD) : GenRegister::unpacked_ud(src0.nr, src0.subnr); + GenRegister s1l = src1.hstride == GEN_HORIZONTAL_STRIDE_0 ? + GenRegister::retype(src1, GEN_TYPE_UD) : GenRegister::unpacked_ud(src1.nr, src1.subnr); + p->MUL(dst, s0l, s1l); + + /* Low 32 bits X high 32 bits. */ + GenRegister s1h = GenRegister::offset(s1l, 0, 4); + p->MUL(res, s0l, s1h); + p->SHL(res, res, GenRegister::immud(32)); + p->ADD(dst, dst, res); + + /* High 32 bits X low 32 bits. */ + GenRegister s0h = GenRegister::offset(s0l, 0, 4); + p->MUL(res, s0h, s1l); + p->SHL(res, res, GenRegister::immud(32)); + p->ADD(dst, dst, res); + } + void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd) { GBE_ASSERT(packed.subnr == 0); diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp index 54cc29d7..7e6d3b35 100644 --- a/backend/src/backend/gen8_context.hpp +++ b/backend/src/backend/gen8_context.hpp @@ -54,6 +54,7 @@ namespace gbe virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn); virtual void emitWrite64Instruction(const SelectionInstruction &insn); virtual void emitRead64Instruction(const SelectionInstruction &insn); + virtual void emitI64MULInstruction(const SelectionInstruction &insn); protected: virtual GenEncoder* generateEncoder(void) { return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 3593d66b..2c97092d 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -163,7 +163,7 @@ namespace gbe void emitSpillRegInstruction(const SelectionInstruction &insn); void emitUnSpillRegInstruction(const SelectionInstruction &insn); void emitGetImageInfoInstruction(const SelectionInstruction &insn); - void emitI64MULInstruction(const SelectionInstruction &insn); + virtual void emitI64MULInstruction(const SelectionInstruction &insn); void emitI64DIVREMInstruction(const SelectionInstruction &insn); void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode); void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 1fdc5a2c..817111d4 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -615,7 +615,7 @@ namespace gbe /*! Get image information */ void GET_IMAGE_INFO(uint32_t type, GenRegister *dst, uint32_t dst_num, uint32_t bti); /*! Multiply 64-bit integers */ - void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); + void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long); /*! 64-bit integer division */ void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]); /*! 64-bit integer remainder of division */ @@ -1361,13 +1361,23 @@ namespace gbe insn->extra.function = function; } - void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MUL, 7, 2); + void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long) { + SelectionInstruction *insn = NULL; + if (native_long) + insn = this->appendInsn(SEL_OP_I64MUL, 2, 2); + else + insn = this->appendInsn(SEL_OP_I64MUL, 7, 2); + insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; - for(int i = 0; i < 6; i++) - insn->dst(i + 1) = tmp[i]; + + if (native_long) { + insn->dst(1) = tmp[0]; + } else { + for (int i = 0; i < 6; i++) + insn->dst(i + 1) = tmp[i]; + } } void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) { @@ -2395,10 +2405,16 @@ namespace gbe sel.pop(); return false; } else if (type == TYPE_S64 || type == TYPE_U64) { - GenRegister tmp[6]; - for(int i = 0; i < 6; i++) - tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); - sel.I64MUL(dst, src0, src1, tmp); + if (sel.hasLongType()) { + GenRegister tmp; + tmp = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64); + sel.I64MUL(dst, src0, src1, &tmp, true); + } else { + GenRegister tmp[6]; + for(int i = 0; i < 6; i++) + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); + sel.I64MUL(dst, src0, src1, tmp, false); + } } else sel.MUL(dst, src0, src1); break; |