diff options
author | Junyan He <junyan.he@linux.intel.com> | 2015-01-06 18:02:39 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-01-20 16:31:05 +0800 |
commit | 72aa942c654cd6dee5f9214b9db6bdba0ae550b9 (patch) | |
tree | 7aea9312776cec028d96c1161851bfbc7635cdb1 | |
parent | b0a375a20a85c2e2b3f96b2e4400e14249fe58a0 (diff) |
Overload the I64MADSAT function.
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r-- | backend/src/backend/gen8_context.cpp | 148 | ||||
-rw-r--r-- | backend/src/backend/gen8_context.hpp | 2 | ||||
-rw-r--r-- | backend/src/backend/gen_context.hpp | 2 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 40 |
4 files changed, 176 insertions, 16 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index a8167809..8960d5be 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -218,6 +218,154 @@ namespace gbe } } + void Gen8Context::emitI64MADSATInstruction(const SelectionInstruction &insn) + { + GenRegister src0 = ra->genReg(insn.src(0)); + GenRegister src1 = ra->genReg(insn.src(1)); + GenRegister src2 = ra->genReg(insn.src(2)); + GenRegister dst_l = ra->genReg(insn.dst(0)); + GenRegister dst_h = ra->genReg(insn.dst(1)); + GenRegister s0_abs = ra->genReg(insn.dst(2)); + GenRegister s1_abs = ra->genReg(insn.dst(3)); + GenRegister tmp0 = ra->genReg(insn.dst(4)); + GenRegister tmp1 = ra->genReg(insn.dst(5)); + GenRegister sign = ra->genReg(insn.dst(6)); + GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag); + + if (src0.type == GEN_TYPE_UL) { + /* Always should be the same long type. */ + GBE_ASSERT(src1.type == GEN_TYPE_UL); + GBE_ASSERT(src2.type == GEN_TYPE_UL); + dst_l.type = dst_h.type = GEN_TYPE_UL; + tmp0.type = tmp1.type = GEN_TYPE_UL; + calculateFullU64MUL(p, src0, src1, dst_h, dst_l, tmp0, tmp1); + + /* Inplement the logic: + dst_l += src2; + if (dst_h) + dst_l = 0xFFFFFFFFFFFFFFFFULL; + if (dst_l < src2) // carry if overflow + dst_l = 0xFFFFFFFFFFFFFFFFULL; + */ + p->ADD(dst_l, dst_l, src2); + + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + p->CMP(GEN_CONDITIONAL_NZ, dst_h, GenRegister::immud(0), tmp0); + p->curr.noMask = 0; + p->MOV(dst_l, GenRegister::immuint64(0xFFFFFFFFFFFFFFFF)); + p->pop(); + + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + p->CMP(GEN_CONDITIONAL_L, dst_l, src2, tmp0); + p->curr.noMask = 0; + p->MOV(dst_l, GenRegister::immuint64(0xFFFFFFFFFFFFFFFF)); + p->pop(); + } else { + GBE_ASSERT(src0.type == GEN_TYPE_L); + GBE_ASSERT(src1.type == GEN_TYPE_L); + GBE_ASSERT(src2.type == GEN_TYPE_L); + + calculateFullS64MUL(p, src0, src1, dst_h, dst_l, s0_abs, s1_abs, tmp0, + tmp1, sign, flagReg); + + GenRegister sum = sign; + sum.type = GEN_TYPE_UL; + src2.type = GEN_TYPE_L; + dst_l.type = GEN_TYPE_UL; + p->NOP(); + p->ADD(sum, src2, dst_l); + + /* Implement this logic: + if(src2 >= 0) { + if(dst_l > sum) { + dst_h++; + if(CL_LONG_MIN == dst_h) { + dst_h = CL_LONG_MAX; + sum = CL_ULONG_MAX; + } + } + } */ + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + p->CMP(GEN_CONDITIONAL_GE, src2, GenRegister::immud(0), tmp1); + p->curr.noMask = 0; + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->CMP(GEN_CONDITIONAL_G, dst_l, sum, tmp1); + p->ADD(dst_h, dst_h, GenRegister::immud(1)); + p->MOV(tmp0, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL)); + p->CMP(GEN_CONDITIONAL_EQ, dst_h, tmp0, tmp1); + p->MOV(dst_h, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL)); + p->MOV(sum, GenRegister::immuint64(0xFFFFFFFFFFFFFFFFULL)); + p->pop(); + p->NOP(); + + /* Implement this logic: + else { + if(dst_l < sum) { + dst_h--; + if(CL_LONG_MAX == dst_h) { + dst_h = CL_LONG_MIN; + sum = 0; + } + } + } */ + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + p->CMP(GEN_CONDITIONAL_L, src2, GenRegister::immud(0), tmp1); + p->curr.noMask = 0; + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->CMP(GEN_CONDITIONAL_L, dst_l, sum, tmp1); + p->ADD(dst_h, dst_h, GenRegister::immd(-1)); + p->MOV(tmp0, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL)); + p->CMP(GEN_CONDITIONAL_EQ, dst_h, tmp0, tmp1); + p->MOV(dst_h, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL)); + p->MOV(sum, GenRegister::immud(0)); + p->pop(); + p->NOP(); + + /* saturate logic: + if(dst_h > 0) + sum = CL_LONG_MAX; + else if(dst_h < -1) + sum = CL_LONG_MIN; + cl_long result = (cl_long) sum; */ + p->MOV(dst_l, sum); + + dst_h.type = GEN_TYPE_L; + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + p->CMP(GEN_CONDITIONAL_G, dst_h, GenRegister::immud(0), tmp1); + p->curr.noMask = 0; + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(dst_l, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL)); + p->pop(); + p->NOP(); + + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + p->CMP(GEN_CONDITIONAL_L, dst_h, GenRegister::immd(-1), tmp1); + p->curr.noMask = 0; + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(dst_l, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL)); + p->pop(); + p->NOP(); + } + } + void Gen8Context::emitI64MULInstruction(const SelectionInstruction &insn) { GenRegister src0 = ra->genReg(insn.src(0)); diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp index 6dc8afc7..9f1d7498 100644 --- a/backend/src/backend/gen8_context.hpp +++ b/backend/src/backend/gen8_context.hpp @@ -55,6 +55,8 @@ namespace gbe virtual void emitI64MULHIInstruction(const SelectionInstruction &insn); virtual void emitI64RHADDInstruction(const SelectionInstruction &insn); virtual void emitI64HADDInstruction(const SelectionInstruction &insn); + virtual void emitI64MADSATInstruction(const SelectionInstruction &insn); + virtual void emitWrite64Instruction(const SelectionInstruction &insn); virtual void emitRead64Instruction(const SelectionInstruction &insn); virtual void emitI64MULInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 49d60174..a366e7f2 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -130,7 +130,7 @@ namespace gbe virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn); void emitTernaryInstruction(const SelectionInstruction &insn); virtual void emitI64MULHIInstruction(const SelectionInstruction &insn); - void emitI64MADSATInstruction(const SelectionInstruction &insn); + virtual void emitI64MADSATInstruction(const SelectionInstruction &insn); virtual void emitI64HADDInstruction(const SelectionInstruction &insn); virtual void emitI64RHADDInstruction(const SelectionInstruction &insn); void emitI64ShiftInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 8615adc8..d4e33c5a 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -527,7 +527,7 @@ namespace gbe /*! Convert 64-bit integer to 32-bit float */ void CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[2]); /*! Saturated 64bit x*y + z */ - void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]); + void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister* tmp, int tmp_num); /*! High 64bit of x*y */ void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_num); /*! (x+y)>>1 without mod. overflow */ @@ -1477,13 +1477,13 @@ namespace gbe insn->dst(i + 1) = tmp[i]; } - void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 10, 3); + void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister *tmp, int tmp_num) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, tmp_num + 1, 3); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; insn->src(2) = src2; - for(int i = 0; i < 9; i ++) + for(int i = 0; i < tmp_num; i ++) insn->dst(i + 1) = tmp[i]; } @@ -4060,17 +4060,27 @@ namespace gbe switch(insn.getOpcode()) { case OP_I64MADSAT: { - GenRegister tmp[9]; - for(int i=0; i<9; i++) { - tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); - tmp[i].type = GEN_TYPE_UD; - } - sel.push(); - sel.curr.flag = 0; - sel.curr.subFlag = 1; - sel.I64MADSAT(dst, src0, src1, src2, tmp); - sel.pop(); - break; + GenRegister tmp[9]; + int tmp_num; + if (!sel.hasLongType()) { + tmp_num = 9; + for(int i=0; i<9; i++) { + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); + tmp[i].type = GEN_TYPE_UD; + } + } else { + tmp_num = 6; + for(int i=0; i<6; i++) { + tmp[i] = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64); + tmp[i].type = GEN_TYPE_UL; + } + } + sel.push(); + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.I64MADSAT(dst, src0, src1, src2, tmp, tmp_num); + sel.pop(); + break; } case OP_MAD: { |