diff options
Diffstat (limited to 'backend')
-rw-r--r-- | backend/src/backend/gen8_context.cpp | 63 | ||||
-rw-r--r-- | backend/src/backend/gen8_context.hpp | 1 | ||||
-rw-r--r-- | backend/src/backend/gen_context.hpp | 2 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 29 |
4 files changed, 83 insertions, 12 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 65c82583..e2f705fd 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -249,6 +249,69 @@ namespace gbe p->ADD(dst, dst, res); } + void Gen8Context::emitI64HADDInstruction(const SelectionInstruction &insn) + { + GenRegister src0 = ra->genReg(insn.src(0)); + GenRegister src1 = ra->genReg(insn.src(1)); + GenRegister dst = ra->genReg(insn.dst(0)); + GenRegister tmp0 = ra->genReg(insn.dst(1)); + GenRegister tmp1 = ra->genReg(insn.dst(2)); + GenRegister tmp_dst = ra->genReg(insn.dst(3)); + int execWidth = p->curr.execWidth; + + /* Src0 and Src1 are always unsigned long type.*/ + GBE_ASSERT(src0.type == GEN_TYPE_UL && src1.type == GEN_TYPE_UL); + dst.type = src0.type; + tmp0.type = tmp1.type = GEN_TYPE_UD; + tmp_dst.type = GEN_TYPE_UL; + + GBE_ASSERT(tmp_dst.subnr == 0); + GenRegister dl = tmp_dst.hstride == GEN_HORIZONTAL_STRIDE_0 ? GenRegister::retype(tmp_dst, GEN_TYPE_UD) : + GenRegister::retype(GenRegister::ud16grf(tmp_dst.nr, tmp_dst.subnr), GEN_TYPE_UD); + GenRegister dh = tmp_dst.hstride == GEN_HORIZONTAL_STRIDE_0 ? + GenRegister::retype(GenRegister::offset(tmp_dst, 0, 4), GEN_TYPE_UD) : + GenRegister::retype(GenRegister::ud16grf(tmp_dst.nr + execWidth / 8, tmp_dst.subnr), GEN_TYPE_UD); + GenRegister s0l = src0.hstride == GEN_HORIZONTAL_STRIDE_0 ? + GenRegister::retype(src0, GEN_TYPE_UD) : GenRegister::unpacked_ud(src0.nr, src0.subnr); + GenRegister s0h = src0.hstride == GEN_HORIZONTAL_STRIDE_0 ? + GenRegister::retype(GenRegister::offset(src0, 0, 4), GEN_TYPE_UD) : + GenRegister::unpacked_ud(src0.nr, src0.subnr + 1); + GenRegister s1l = src1.hstride == GEN_HORIZONTAL_STRIDE_0 ? + GenRegister::retype(src1, GEN_TYPE_UD) : GenRegister::unpacked_ud(src1.nr, src1.subnr); + GenRegister s1h = src1.hstride == GEN_HORIZONTAL_STRIDE_0 ? + GenRegister::retype(GenRegister::offset(src1, 0, 4), GEN_TYPE_UD) : + GenRegister::unpacked_ud(src1.nr, src1.subnr + 1); + + GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D); + p->push(); + p->curr.execWidth = 8; + p->ADDC(dl, s0l, s1l); + p->MOV(tmp0, acc0); + p->ADDC(dh, s0h, s1h); + p->MOV(tmp1, acc0); + p->ADDC(dh, dh, tmp0); + p->MOV(tmp0, acc0); + p->ADD(tmp1, tmp0, tmp1); + + if (execWidth == 16) { + p->curr.quarterControl = 1; + p->ADDC(GenRegister::Qn(dl, 1), GenRegister::Qn(s0l, 1), GenRegister::Qn(s1l, 1)); + p->MOV(GenRegister::Qn(tmp0, 1), acc0); + p->ADDC(GenRegister::Qn(dh, 1), GenRegister::Qn(s0h, 1), GenRegister::Qn(s1h, 1)); + p->MOV(GenRegister::Qn(tmp1, 1), acc0); + p->ADDC(GenRegister::Qn(dh, 1), GenRegister::Qn(dh, 1), GenRegister::Qn(tmp0, 1)); + p->MOV(GenRegister::Qn(tmp0, 1), acc0); + p->ADD(GenRegister::Qn(tmp1, 1), GenRegister::Qn(tmp0, 1), GenRegister::Qn(tmp1, 1)); + } + p->pop(); + + packLongVec(GenRegister::retype(tmp_dst, GEN_TYPE_UD), GenRegister::retype(dst, GEN_TYPE_UD), execWidth); + + p->SHR(dst, dst, GenRegister::immud(1)); + p->SHL(tmp_dst, tmp1, GenRegister::immud(63)); + p->ADD(dst, dst, tmp_dst); + } + void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd) { GBE_ASSERT(packed.subnr == 0); diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp index 7e0a9ce5..9204d106 100644 --- a/backend/src/backend/gen8_context.hpp +++ b/backend/src/backend/gen8_context.hpp @@ -53,6 +53,7 @@ namespace gbe virtual void emitBinaryInstruction(const SelectionInstruction &insn); virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn); virtual void emitI64MULHIInstruction(const SelectionInstruction &insn); + virtual void emitI64HADDInstruction(const SelectionInstruction &insn); virtual void emitWrite64Instruction(const SelectionInstruction &insn); virtual void emitRead64Instruction(const SelectionInstruction &insn); virtual void emitI64MULInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 1e6e5ab0..e3506763 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -131,7 +131,7 @@ namespace gbe void emitTernaryInstruction(const SelectionInstruction &insn); virtual void emitI64MULHIInstruction(const SelectionInstruction &insn); void emitI64MADSATInstruction(const SelectionInstruction &insn); - void emitI64HADDInstruction(const SelectionInstruction &insn); + virtual void emitI64HADDInstruction(const SelectionInstruction &insn); void emitI64RHADDInstruction(const SelectionInstruction &insn); void emitI64ShiftInstruction(const SelectionInstruction &insn); void emitI64CompareInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index cac50dc9..fb56f8a2 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -531,7 +531,7 @@ namespace gbe /*! High 64bit of x*y */ void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_num); /*! (x+y)>>1 without mod. overflow */ - void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]); + void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_num); /*! (x+y+1)>>1 without mod. overflow */ void I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]); /*! Shift a 64-bit integer */ @@ -1496,12 +1496,12 @@ namespace gbe insn->dst(i + 1) = tmp[i]; } - void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, 5, 2); + void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_num) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, tmp_num + 1, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; - for(int i = 0; i < 4; i ++) + for(int i = 0; i < tmp_num; i ++) insn->dst(i + 1) = tmp[i]; } @@ -2438,13 +2438,20 @@ namespace gbe break; } case OP_I64HADD: - { - GenRegister tmp[4]; - for(int i=0; i<4; i++) - tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); - sel.I64HADD(dst, src0, src1, tmp); - break; - } + { + GenRegister tmp[4]; + if (!sel.hasLongType()) { + for(int i=0; i<4; i++) + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); + sel.I64HADD(dst, src0, src1, tmp, 4); + } else { + tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U64); + tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U64); + tmp[2] = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64); + sel.I64HADD(dst, src0, src1, tmp, 3); + } + break; + } case OP_I64RHADD: { GenRegister tmp[4]; |