diff options
-rw-r--r-- | backend/src/backend/gen8_context.cpp | 31 | ||||
-rw-r--r-- | backend/src/backend/gen8_context.hpp | 2 | ||||
-rw-r--r-- | backend/src/backend/gen_context.hpp | 2 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 43 |
4 files changed, 66 insertions, 12 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 8960d5be..daa4182b 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -531,6 +531,37 @@ namespace gbe p->ADD(dst, dst, tmp_dst); } + void Gen8Context::emitI64DIVREMInstruction(const SelectionInstruction &cnst_insn) + { + SelectionInstruction* insn = const_cast<SelectionInstruction*>(&cnst_insn); + GenRegister packed_src0 = ra->genReg(insn->src(0)); + GenRegister packed_src1 = ra->genReg(insn->src(1)); + GenRegister dst = ra->genReg(insn->dst(0)); + int tmp_reg_n = 14; + + if (packed_src0.hstride != GEN_HORIZONTAL_STRIDE_0) { + GenRegister unpacked_src0 = ra->genReg(insn->dst(tmp_reg_n)); + unpackLongVec(packed_src0, unpacked_src0, p->curr.execWidth); + tmp_reg_n++; + insn->src(0) = unpacked_src0; + } + if (packed_src1.hstride != GEN_HORIZONTAL_STRIDE_0) { + GenRegister unpacked_src1 = ra->genReg(insn->dst(tmp_reg_n)); + unpackLongVec(packed_src1, unpacked_src1, p->curr.execWidth); + tmp_reg_n++; + insn->src(1) = unpacked_src1; + } + GBE_ASSERT(tmp_reg_n <= insn->dstNum); + + GenContext::emitI64DIVREMInstruction(*insn); + + if (dst.hstride != GEN_HORIZONTAL_STRIDE_0) { + GenRegister dst_packed = ra->genReg(insn->dst(14)); + packLongVec(dst, dst_packed, p->curr.execWidth); + p->MOV(dst, dst_packed); + } + } + void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd) { GBE_ASSERT(packed.subnr == 0); diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp index 9f1d7498..bea78b65 100644 --- a/backend/src/backend/gen8_context.hpp +++ b/backend/src/backend/gen8_context.hpp @@ -60,6 +60,8 @@ namespace gbe virtual void emitWrite64Instruction(const SelectionInstruction &insn); virtual void emitRead64Instruction(const SelectionInstruction &insn); virtual void emitI64MULInstruction(const SelectionInstruction &insn); + virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn); + protected: virtual GenEncoder* generateEncoder(void) { return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index a366e7f2..3d01f2b1 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -164,7 +164,7 @@ namespace gbe void emitUnSpillRegInstruction(const SelectionInstruction &insn); void emitGetImageInfoInstruction(const SelectionInstruction &insn); virtual void emitI64MULInstruction(const SelectionInstruction &insn); - void emitI64DIVREMInstruction(const SelectionInstruction &insn); + virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn); void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode); void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index d4e33c5a..27292a3b 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -617,9 +617,9 @@ namespace gbe /*! Multiply 64-bit integers */ void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long); /*! 64-bit integer division */ - void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]); + void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int); /*! 64-bit integer remainder of division */ - void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]); + void I64REM(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int); /* common functions for both binary instruction and sel_cmp and compare instruction. It will handle the IMM or normal register assignment, and will try to avoid LOADI as much as possible. */ @@ -1380,21 +1380,21 @@ namespace gbe } } - void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 14, 2); + void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmp_num) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, tmp_num + 1, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; - for(int i = 0; i < 13; i++) + for(int i = 0; i < tmp_num; i++) insn->dst(i + 1) = tmp[i]; } - void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 14, 2); + void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmp_num) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, tmp_num + 1, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; - for(int i = 0; i < 13; i++) + for(int i = 0; i < tmp_num; i++) insn->dst(i + 1) = tmp[i]; } @@ -2201,18 +2201,39 @@ namespace gbe GBE_ASSERT(op != OP_REM); sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1); } else if (type == TYPE_S64 || type == TYPE_U64) { - GenRegister tmp[13]; + GenRegister tmp[15]; + int tmp_num = 13; for(int i=0; i < 13; i++) { tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[i].type = GEN_TYPE_UD; } + + if (sel.hasLongType()) { + if (!sel.isScalarReg(insn.getSrc(0))) { + tmp[tmp_num] = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), src0.type); + tmp_num++; + } + + if (!sel.isScalarReg(insn.getSrc(1))) { + tmp[tmp_num] = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), src1.type); + tmp_num++; + } + + /* We at least one tmp register to convert if dst is not scalar. */ + if (!sel.isScalarReg(insn.getDst(0)) && sel.isScalarReg(insn.getSrc(0)) + && sel.isScalarReg(insn.getSrc(1))) { + GBE_ASSERT(tmp_num == 13); + tmp[tmp_num] = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64); + tmp_num++; + } + } sel.push(); sel.curr.flag = 0; sel.curr.subFlag = 1; if(op == OP_DIV) - sel.I64DIV(dst, src0, src1, tmp); + sel.I64DIV(dst, src0, src1, tmp, tmp_num); else - sel.I64REM(dst, src0, src1, tmp); + sel.I64REM(dst, src0, src1, tmp, tmp_num); sel.pop(); } markAllChildren(dag); |