diff options
author | Zhigang Gong <zhigang.gong@intel.com> | 2014-04-02 14:36:19 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-04-08 16:21:33 +0800 |
commit | 745fb36d9f756ebce120d49498f8ab04a9f14848 (patch) | |
tree | 2a25719f49e0d90d8c52da27b27b5fab700a98bf | |
parent | 858414e497141b54c0df18510cb764d3770a2d9c (diff) |
GBE: fix the flag usage of those long/64 bit instruction.
Make the flag allocation be aware of the long/64bit insn
will use the flag0.1. And don't hard coded f0.1 at the gen_context
stage.
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Reviewed-by: "Song, Ruiling" <ruiling.song@intel.com>
-rw-r--r-- | backend/src/backend/gen_context.cpp | 50 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 173 | ||||
-rw-r--r-- | backend/src/backend/gen_reg_allocation.cpp | 5 |
3 files changed, 122 insertions, 106 deletions
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 3224c065..50f10c5f 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -566,9 +566,8 @@ namespace gbe GenRegister g = ra->genReg(insn.dst(7)); GenRegister h = ra->genReg(insn.dst(8)); GenRegister i = ra->genReg(insn.dst(9)); - //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(10))); - // We just simply use the temporary flag here. - GenRegister flagReg = GenRegister::flag(0, 1); + GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1); + GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag); loadTopHalf(a, x); loadBottomHalf(b, x); loadTopHalf(c, y); @@ -615,9 +614,8 @@ namespace gbe GenRegister g = ra->genReg(insn.dst(7)); GenRegister h = ra->genReg(insn.dst(8)); GenRegister i = ra->genReg(insn.dst(9)); - //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(10))); - // We just simply use the temporary flag here. - GenRegister flagReg = GenRegister::flag(0, 1); + GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1); + GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag); GenRegister zero = GenRegister::immud(0), one = GenRegister::immud(1); loadTopHalf(a, x); loadBottomHalf(b, x); @@ -801,9 +799,8 @@ namespace gbe GenRegister e = ra->genReg(insn.dst(5)); GenRegister f = ra->genReg(insn.dst(6)); a.type = b.type = c.type = d.type = e.type = f.type = GEN_TYPE_UD; - //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(7))); - // We just simply use the temporary flag here. - GenRegister flagReg = GenRegister::flag(0, 1); + GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1); + GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag); GenRegister zero = GenRegister::immud(0); switch(insn.opcode) { case SEL_OP_I64SHL: @@ -1007,19 +1004,18 @@ namespace gbe GenRegister mantissa = ra->genReg(insn.dst(4)); GenRegister tmp = ra->genReg(insn.dst(5)); GenRegister tmp_high = ra->genReg(insn.dst(6)); - //GenRegister f0 = checkFlagRegister(ra->genReg(insn.dst(7))); - // We just simply use the temporary flag here. - GenRegister f0 = GenRegister::flag(0, 1); + GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1); + GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag); loadTopHalf(high, src); loadBottomHalf(low, src); if(!src.is_signed_int()) { - UnsignedI64ToFloat(dest, high, low, exp, mantissa, tmp, f0); + UnsignedI64ToFloat(dest, high, low, exp, mantissa, tmp, flagReg); } else { p->MOV(tmp_high, high); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - p->curr.useFlag(f0.flag_nr(), f0.flag_subnr()); + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_GE, tmp_high, GenRegister::immud(0x80000000)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->NOT(high, high); @@ -1028,11 +1024,11 @@ namespace gbe addWithCarry(low, low, tmp); p->ADD(high, high, tmp); p->pop(); - UnsignedI64ToFloat(dest, high, low, exp, mantissa, tmp, f0); + UnsignedI64ToFloat(dest, high, low, exp, mantissa, tmp, flagReg); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - p->curr.useFlag(f0.flag_nr(), f0.flag_subnr()); + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_GE, tmp_high, GenRegister::immud(0x80000000)); p->curr.predicate = GEN_PREDICATE_NORMAL; dest.type = GEN_TYPE_UD; @@ -1047,9 +1043,8 @@ namespace gbe GenRegister dst = ra->genReg(insn.dst(0)); GenRegister high = ra->genReg(insn.dst(1)); GenRegister tmp = ra->genReg(insn.dst(2)); - //GenRegister flag0 = checkFlagRegister(ra->genReg(insn.dst(3))); - // We just simply use the temporary flag here. - GenRegister flag0 = GenRegister::flag(0, 1); + GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1); + GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag); if(dst.is_signed_int()) high = GenRegister::retype(high, GEN_TYPE_D); @@ -1068,7 +1063,7 @@ namespace gbe p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - p->curr.useFlag(flag0.flag_nr(), flag0.flag_subnr()); + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_L, src, GenRegister::immf(0x0)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->CMP(GEN_CONDITIONAL_NEQ, low, GenRegister::immud(0x0)); @@ -1170,9 +1165,8 @@ namespace gbe GenRegister c = ra->genReg(insn.dst(3)); GenRegister d = ra->genReg(insn.dst(4)); GenRegister e = ra->genReg(insn.dst(5)); - //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(6))); - // We just simply use the temporary flag here. - GenRegister flagReg = GenRegister::flag(0, 1); + GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1); + GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag); loadTopHalf(a, x); loadBottomHalf(b, x); loadTopHalf(c, y); @@ -1220,9 +1214,8 @@ namespace gbe GenRegister c = ra->genReg(insn.dst(3)); GenRegister d = ra->genReg(insn.dst(4)); GenRegister e = ra->genReg(insn.dst(5)); - //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(6))); - // We just simply use the temporary flag here. - GenRegister flagReg = GenRegister::flag(0, 1); + GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1); + GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag); loadTopHalf(a, x); loadBottomHalf(b, x); loadTopHalf(c, y); @@ -1428,9 +1421,8 @@ namespace gbe GenRegister k = ra->genReg(insn.dst(11)); GenRegister l = ra->genReg(insn.dst(12)); GenRegister m = ra->genReg(insn.dst(13)); - //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(14))); - // We just simply use the temporary flag here. - GenRegister flagReg = GenRegister::flag(0, 1); + GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1); + GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag); GenRegister zero = GenRegister::immud(0), one = GenRegister::immud(1), imm31 = GenRegister::immud(31); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 975c71a2..d0e3d0b7 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -423,7 +423,7 @@ namespace gbe #define ALU3(OP) \ INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); } #define I64Shift(OP) \ - INLINE void OP(Reg dst, Reg src0, Reg src1, GenRegister tmp[7]) { I64Shift(SEL_OP_##OP, dst, src0, src1, tmp); } + INLINE void OP(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { I64Shift(SEL_OP_##OP, dst, src0, src1, tmp); } ALU1(MOV) ALU1WithTemp(MOV_DF) ALU1WithTemp(LOAD_DF_IMM) @@ -477,13 +477,13 @@ namespace gbe #undef ALU3 #undef I64Shift /*! Convert 64-bit integer to 32-bit float */ - void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[7]); + void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[6]); /*! Convert 64-bit integer to 32-bit float */ - void CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[3]); + void CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[2]); /*! Saturated 64bit x*y + z */ - void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]); + void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]); /*! High 64bit of x*y */ - void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]); + void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[9]); /*! (x+y)>>1 without mod. overflow */ void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]); /*! (x+y+1)>>1 without mod. overflow */ @@ -493,9 +493,9 @@ namespace gbe /*! Compare 64-bit integer */ void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]); /*! Saturated addition of 64-bit integer */ - void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); + void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]); /*! Saturated subtraction of 64-bit integer */ - void I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); + void I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]); /*! Encode a barrier instruction */ void BARRIER(GenRegister src, GenRegister fence, uint32_t barrierType); /*! Encode a barrier instruction */ @@ -567,9 +567,9 @@ namespace gbe /*! Multiply 64-bit integers */ void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); /*! 64-bit integer division */ - void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]); + void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]); /*! 64-bit integer remainder of division */ - void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]); + void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]); /*! Use custom allocators */ GBE_CLASS(Opaque); friend class SelectionBlock; @@ -1195,21 +1195,21 @@ namespace gbe insn->dst(i + 1) = tmp[i]; } - void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 15, 2); + void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 14, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; - for(int i = 0; i < 14; i++) + for(int i = 0; i < 13; i++) insn->dst(i + 1) = tmp[i]; } - void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 15, 2); + void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 14, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; - for(int i = 0; i < 14; i++) + for(int i = 0; i < 13; i++) insn->dst(i + 1) = tmp[i]; } @@ -1258,47 +1258,47 @@ namespace gbe insn->extra.function = conditional; } - void Selection::Opaque::I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { + void Selection::Opaque::I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATADD, 7, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; - for(int i=0; i<6; i++) + for(int i=0; i<5; i++) insn->dst(i + 1) = tmp[i]; } - void Selection::Opaque::I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATSUB, 7, 2); + void Selection::Opaque::I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATSUB, 6, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; - for(int i=0; i<6; i++) + for(int i=0; i<5; i++) insn->dst(i + 1) = tmp[i]; } - void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[7]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 8, 1); + void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[6]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 7, 1); insn->dst(0) = dst; insn->src(0) = src; - for(int i = 0; i < 7; i ++) + for(int i = 0; i < 6; i ++) insn->dst(i + 1) = tmp[i]; } - void Selection::Opaque::CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[3]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVF_TO_I64, 4, 1); + void Selection::Opaque::CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[2]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVF_TO_I64, 3, 1); insn->dst(0) = dst; insn->src(0) = src; - for(int i = 0; i < 3; i ++) + for(int i = 0; i < 2; i ++) insn->dst(i + 1) = tmp[i]; } - void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 11, 3); + void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 10, 3); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; insn->src(2) = src2; - for(int i = 0; i < 10; i ++) + for(int i = 0; i < 9; i ++) insn->dst(i + 1) = tmp[i]; } @@ -1329,12 +1329,12 @@ namespace gbe insn->dst(i + 1) = tmp[i]; } - void Selection::Opaque::I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[7]) { - SelectionInstruction *insn = this->appendInsn(opcode, 8, 2); + void Selection::Opaque::I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { + SelectionInstruction *insn = this->appendInsn(opcode, 7, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; - for(int i = 0; i < 7; i ++) + for(int i = 0; i < 6; i ++) insn->dst(i + 1) = tmp[i]; } @@ -1784,16 +1784,19 @@ namespace gbe GBE_ASSERT(op != OP_REM); sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1); } else if (type == TYPE_S64 || type == TYPE_U64) { - GenRegister tmp[14]; - for(int i=0; i<13; i++) { + GenRegister tmp[13]; + for(int i=0; i < 13; i++) { tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[i].type = GEN_TYPE_UD; } - tmp[13] = sel.selReg(sel.reg(FAMILY_BOOL, true)); - if(op == OP_DIV) - sel.I64DIV(dst, src0, src1, tmp); - else - sel.I64REM(dst, src0, src1, tmp); + sel.push(); + sel.curr.flag = 0; + sel.curr.subFlag = 1; + if(op == OP_DIV) + sel.I64DIV(dst, src0, src1, tmp); + else + sel.I64REM(dst, src0, src1, tmp); + sel.pop(); } markAllChildren(dag); return true; @@ -1883,13 +1886,16 @@ namespace gbe break; case OP_ADDSAT: if (type == Type::TYPE_U64 || type == Type::TYPE_S64) { - GenRegister tmp[6]; + GenRegister tmp[5]; for(int i=0; i<5; i++) { tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[i].type = GEN_TYPE_UD; } - tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL, true)); - sel.I64SATADD(dst, src0, src1, tmp); + sel.push(); + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.I64SATADD(dst, src0, src1, tmp); + sel.pop(); break; } sel.push(); @@ -1924,13 +1930,16 @@ namespace gbe break; case OP_SUBSAT: if (type == Type::TYPE_U64 || type == Type::TYPE_S64) { - GenRegister tmp[6]; + GenRegister tmp[5]; for(int i=0; i<5; i++) { tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[i].type = GEN_TYPE_UD; } - tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL, true)); - sel.I64SATSUB(dst, src0, src1, tmp); + sel.push(); + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.I64SATSUB(dst, src0, src1, tmp); + sel.pop(); break; } sel.push(); @@ -1940,31 +1949,40 @@ namespace gbe break; case OP_SHL: if (type == TYPE_S64 || type == TYPE_U64) { - GenRegister tmp[7]; + GenRegister tmp[6]; for(int i = 0; i < 6; i ++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); - tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true)); - sel.I64SHL(dst, src0, src1, tmp); + sel.push(); + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.I64SHL(dst, src0, src1, tmp); + sel.pop(); } else sel.SHL(dst, src0, src1); break; case OP_SHR: if (type == TYPE_S64 || type == TYPE_U64) { - GenRegister tmp[7]; + GenRegister tmp[6]; for(int i = 0; i < 6; i ++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); - tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true)); - sel.I64SHR(dst, src0, src1, tmp); + sel.push(); + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.I64SHR(dst, src0, src1, tmp); + sel.pop(); } else sel.SHR(dst, src0, src1); break; case OP_ASR: if (type == TYPE_S64 || type == TYPE_U64) { - GenRegister tmp[7]; + GenRegister tmp[6]; for(int i = 0; i < 6; i ++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); - tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true)); - sel.I64ASR(dst, src0, src1, tmp); + sel.push(); + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.I64ASR(dst, src0, src1, tmp); + sel.pop(); } else sel.ASR(dst, src0, src1); break; @@ -1975,13 +1993,16 @@ namespace gbe } case OP_I64_MUL_HI: { - GenRegister temp[10]; + GenRegister temp[9]; for(int i=0; i<9; i++) { temp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); temp[i].type = GEN_TYPE_UD; } - temp[9] = sel.selReg(sel.reg(FAMILY_BOOL, true)); - sel.I64_MUL_HI(dst, src0, src1, temp); + sel.push(); + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.I64_MUL_HI(dst, src0, src1, temp); + sel.pop(); break; } case OP_MUL: @@ -2752,17 +2773,6 @@ namespace gbe sel.curr.flagGen = 1; sel.CMP(getGenCompare(opcode), src0, src1, tmpDst); } -#if 0 - if((type == TYPE_S64 || type == TYPE_U64 || - type == TYPE_DOUBLE || type == TYPE_FLOAT || - type == TYPE_U32 || type == TYPE_S32) /*&& - needStoreBool*/) { - sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.SEL(sel.selReg(dst, TYPE_U16), - sel.selReg(ir::ocl::one, TYPE_U16), - sel.selReg(ir::ocl::zero, TYPE_U16)); - } -#endif sel.pop(); return true; } @@ -2893,12 +2903,15 @@ namespace gbe } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) { sel.CONVI64_TO_I(dst, src); } else if (dstType == ir::TYPE_FLOAT && srcFamily == FAMILY_QWORD) { - GenRegister tmp[7]; + GenRegister tmp[6]; for(int i=0; i<6; i++) { tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); } - tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true), TYPE_BOOL); - sel.CONVI64_TO_F(dst, src, tmp); + sel.push(); + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.CONVI64_TO_F(dst, src, tmp); + sel.pop(); } else if (dst.isdf()) { ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD); sel.MOV_DF(dst, src, sel.selReg(r)); @@ -2906,11 +2919,14 @@ namespace gbe switch(src.type) { case GEN_TYPE_F: { - GenRegister tmp[3]; + GenRegister tmp[2]; tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT); - tmp[2] = sel.selReg(sel.reg(FAMILY_BOOL, true), TYPE_BOOL); - sel.CONVF_TO_I64(dst, src, tmp); + sel.push(); + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.CONVF_TO_I64(dst, src, tmp); + sel.pop(); break; } case GEN_TYPE_DF: @@ -3018,13 +3034,16 @@ namespace gbe switch(insn.getOpcode()) { case OP_I64MADSAT: { - GenRegister tmp[10]; + GenRegister tmp[9]; for(int i=0; i<9; i++) { tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[i].type = GEN_TYPE_UD; } - tmp[9] = sel.selReg(sel.reg(FAMILY_BOOL, true)); - sel.I64MADSAT(dst, src0, src1, src2, tmp); + sel.push(); + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.I64MADSAT(dst, src0, src1, src2, tmp); + sel.pop(); break; } case OP_MAD: diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index c6d7d586..937f5b22 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -584,6 +584,11 @@ namespace gbe interval1.minID = std::min(interval1.minID, (int32_t)insn.ID); interval1.maxID = std::max(interval1.maxID, (int32_t)insn.ID); } + } else { + // If the instruction use the temporary flag register manually, + // we should invalidate the temp flag reg here. + if (insn.state.flag == 0 && insn.state.subFlag == 1) + validTempFlagReg = 0; } } } |