summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-04-02 14:36:19 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-04-08 16:21:33 +0800
commit745fb36d9f756ebce120d49498f8ab04a9f14848 (patch)
tree2a25719f49e0d90d8c52da27b27b5fab700a98bf
parent858414e497141b54c0df18510cb764d3770a2d9c (diff)
GBE: fix the flag usage of those long/64 bit instruction.
Make the flag allocation be aware of the long/64bit insn will use the flag0.1. And don't hard coded f0.1 at the gen_context stage. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com> Reviewed-by: "Song, Ruiling" <ruiling.song@intel.com>
-rw-r--r--backend/src/backend/gen_context.cpp50
-rw-r--r--backend/src/backend/gen_insn_selection.cpp173
-rw-r--r--backend/src/backend/gen_reg_allocation.cpp5
3 files changed, 122 insertions, 106 deletions
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 3224c065..50f10c5f 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -566,9 +566,8 @@ namespace gbe
GenRegister g = ra->genReg(insn.dst(7));
GenRegister h = ra->genReg(insn.dst(8));
GenRegister i = ra->genReg(insn.dst(9));
- //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(10)));
- // We just simply use the temporary flag here.
- GenRegister flagReg = GenRegister::flag(0, 1);
+ GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1);
+ GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
loadTopHalf(a, x);
loadBottomHalf(b, x);
loadTopHalf(c, y);
@@ -615,9 +614,8 @@ namespace gbe
GenRegister g = ra->genReg(insn.dst(7));
GenRegister h = ra->genReg(insn.dst(8));
GenRegister i = ra->genReg(insn.dst(9));
- //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(10)));
- // We just simply use the temporary flag here.
- GenRegister flagReg = GenRegister::flag(0, 1);
+ GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1);
+ GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
GenRegister zero = GenRegister::immud(0), one = GenRegister::immud(1);
loadTopHalf(a, x);
loadBottomHalf(b, x);
@@ -801,9 +799,8 @@ namespace gbe
GenRegister e = ra->genReg(insn.dst(5));
GenRegister f = ra->genReg(insn.dst(6));
a.type = b.type = c.type = d.type = e.type = f.type = GEN_TYPE_UD;
- //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(7)));
- // We just simply use the temporary flag here.
- GenRegister flagReg = GenRegister::flag(0, 1);
+ GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1);
+ GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
GenRegister zero = GenRegister::immud(0);
switch(insn.opcode) {
case SEL_OP_I64SHL:
@@ -1007,19 +1004,18 @@ namespace gbe
GenRegister mantissa = ra->genReg(insn.dst(4));
GenRegister tmp = ra->genReg(insn.dst(5));
GenRegister tmp_high = ra->genReg(insn.dst(6));
- //GenRegister f0 = checkFlagRegister(ra->genReg(insn.dst(7)));
- // We just simply use the temporary flag here.
- GenRegister f0 = GenRegister::flag(0, 1);
+ GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1);
+ GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
loadTopHalf(high, src);
loadBottomHalf(low, src);
if(!src.is_signed_int()) {
- UnsignedI64ToFloat(dest, high, low, exp, mantissa, tmp, f0);
+ UnsignedI64ToFloat(dest, high, low, exp, mantissa, tmp, flagReg);
} else {
p->MOV(tmp_high, high);
p->push();
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- p->curr.useFlag(f0.flag_nr(), f0.flag_subnr());
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
p->CMP(GEN_CONDITIONAL_GE, tmp_high, GenRegister::immud(0x80000000));
p->curr.predicate = GEN_PREDICATE_NORMAL;
p->NOT(high, high);
@@ -1028,11 +1024,11 @@ namespace gbe
addWithCarry(low, low, tmp);
p->ADD(high, high, tmp);
p->pop();
- UnsignedI64ToFloat(dest, high, low, exp, mantissa, tmp, f0);
+ UnsignedI64ToFloat(dest, high, low, exp, mantissa, tmp, flagReg);
p->push();
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- p->curr.useFlag(f0.flag_nr(), f0.flag_subnr());
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
p->CMP(GEN_CONDITIONAL_GE, tmp_high, GenRegister::immud(0x80000000));
p->curr.predicate = GEN_PREDICATE_NORMAL;
dest.type = GEN_TYPE_UD;
@@ -1047,9 +1043,8 @@ namespace gbe
GenRegister dst = ra->genReg(insn.dst(0));
GenRegister high = ra->genReg(insn.dst(1));
GenRegister tmp = ra->genReg(insn.dst(2));
- //GenRegister flag0 = checkFlagRegister(ra->genReg(insn.dst(3)));
- // We just simply use the temporary flag here.
- GenRegister flag0 = GenRegister::flag(0, 1);
+ GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1);
+ GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
if(dst.is_signed_int())
high = GenRegister::retype(high, GEN_TYPE_D);
@@ -1068,7 +1063,7 @@ namespace gbe
p->push();
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- p->curr.useFlag(flag0.flag_nr(), flag0.flag_subnr());
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
p->CMP(GEN_CONDITIONAL_L, src, GenRegister::immf(0x0));
p->curr.predicate = GEN_PREDICATE_NORMAL;
p->CMP(GEN_CONDITIONAL_NEQ, low, GenRegister::immud(0x0));
@@ -1170,9 +1165,8 @@ namespace gbe
GenRegister c = ra->genReg(insn.dst(3));
GenRegister d = ra->genReg(insn.dst(4));
GenRegister e = ra->genReg(insn.dst(5));
- //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(6)));
- // We just simply use the temporary flag here.
- GenRegister flagReg = GenRegister::flag(0, 1);
+ GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1);
+ GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
loadTopHalf(a, x);
loadBottomHalf(b, x);
loadTopHalf(c, y);
@@ -1220,9 +1214,8 @@ namespace gbe
GenRegister c = ra->genReg(insn.dst(3));
GenRegister d = ra->genReg(insn.dst(4));
GenRegister e = ra->genReg(insn.dst(5));
- //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(6)));
- // We just simply use the temporary flag here.
- GenRegister flagReg = GenRegister::flag(0, 1);
+ GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1);
+ GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
loadTopHalf(a, x);
loadBottomHalf(b, x);
loadTopHalf(c, y);
@@ -1428,9 +1421,8 @@ namespace gbe
GenRegister k = ra->genReg(insn.dst(11));
GenRegister l = ra->genReg(insn.dst(12));
GenRegister m = ra->genReg(insn.dst(13));
- //GenRegister flagReg = checkFlagRegister(ra->genReg(insn.dst(14)));
- // We just simply use the temporary flag here.
- GenRegister flagReg = GenRegister::flag(0, 1);
+ GBE_ASSERT(insn.state.flag == 0 && insn.state.subFlag == 1);
+ GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
GenRegister zero = GenRegister::immud(0),
one = GenRegister::immud(1),
imm31 = GenRegister::immud(31);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 975c71a2..d0e3d0b7 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -423,7 +423,7 @@ namespace gbe
#define ALU3(OP) \
INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); }
#define I64Shift(OP) \
- INLINE void OP(Reg dst, Reg src0, Reg src1, GenRegister tmp[7]) { I64Shift(SEL_OP_##OP, dst, src0, src1, tmp); }
+ INLINE void OP(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { I64Shift(SEL_OP_##OP, dst, src0, src1, tmp); }
ALU1(MOV)
ALU1WithTemp(MOV_DF)
ALU1WithTemp(LOAD_DF_IMM)
@@ -477,13 +477,13 @@ namespace gbe
#undef ALU3
#undef I64Shift
/*! Convert 64-bit integer to 32-bit float */
- void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[7]);
+ void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[6]);
/*! Convert 64-bit integer to 32-bit float */
- void CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[3]);
+ void CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[2]);
/*! Saturated 64bit x*y + z */
- void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]);
+ void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]);
/*! High 64bit of x*y */
- void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]);
+ void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[9]);
/*! (x+y)>>1 without mod. overflow */
void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]);
/*! (x+y+1)>>1 without mod. overflow */
@@ -493,9 +493,9 @@ namespace gbe
/*! Compare 64-bit integer */
void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]);
/*! Saturated addition of 64-bit integer */
- void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
+ void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]);
/*! Saturated subtraction of 64-bit integer */
- void I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
+ void I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]);
/*! Encode a barrier instruction */
void BARRIER(GenRegister src, GenRegister fence, uint32_t barrierType);
/*! Encode a barrier instruction */
@@ -567,9 +567,9 @@ namespace gbe
/*! Multiply 64-bit integers */
void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
/*! 64-bit integer division */
- void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]);
+ void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
/*! 64-bit integer remainder of division */
- void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]);
+ void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
/*! Use custom allocators */
GBE_CLASS(Opaque);
friend class SelectionBlock;
@@ -1195,21 +1195,21 @@ namespace gbe
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 15, 2);
+ void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 14, 2);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i = 0; i < 14; i++)
+ for(int i = 0; i < 13; i++)
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 15, 2);
+ void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 14, 2);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i = 0; i < 14; i++)
+ for(int i = 0; i < 13; i++)
insn->dst(i + 1) = tmp[i];
}
@@ -1258,47 +1258,47 @@ namespace gbe
insn->extra.function = conditional;
}
- void Selection::Opaque::I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
+ void Selection::Opaque::I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATADD, 7, 2);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i=0; i<6; i++)
+ for(int i=0; i<5; i++)
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATSUB, 7, 2);
+ void Selection::Opaque::I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATSUB, 6, 2);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i=0; i<6; i++)
+ for(int i=0; i<5; i++)
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[7]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 8, 1);
+ void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[6]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 7, 1);
insn->dst(0) = dst;
insn->src(0) = src;
- for(int i = 0; i < 7; i ++)
+ for(int i = 0; i < 6; i ++)
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[3]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVF_TO_I64, 4, 1);
+ void Selection::Opaque::CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[2]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVF_TO_I64, 3, 1);
insn->dst(0) = dst;
insn->src(0) = src;
- for(int i = 0; i < 3; i ++)
+ for(int i = 0; i < 2; i ++)
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 11, 3);
+ void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 10, 3);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
insn->src(2) = src2;
- for(int i = 0; i < 10; i ++)
+ for(int i = 0; i < 9; i ++)
insn->dst(i + 1) = tmp[i];
}
@@ -1329,12 +1329,12 @@ namespace gbe
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[7]) {
- SelectionInstruction *insn = this->appendInsn(opcode, 8, 2);
+ void Selection::Opaque::I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
+ SelectionInstruction *insn = this->appendInsn(opcode, 7, 2);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i = 0; i < 7; i ++)
+ for(int i = 0; i < 6; i ++)
insn->dst(i + 1) = tmp[i];
}
@@ -1784,16 +1784,19 @@ namespace gbe
GBE_ASSERT(op != OP_REM);
sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1);
} else if (type == TYPE_S64 || type == TYPE_U64) {
- GenRegister tmp[14];
- for(int i=0; i<13; i++) {
+ GenRegister tmp[13];
+ for(int i=0; i < 13; i++) {
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
tmp[i].type = GEN_TYPE_UD;
}
- tmp[13] = sel.selReg(sel.reg(FAMILY_BOOL, true));
- if(op == OP_DIV)
- sel.I64DIV(dst, src0, src1, tmp);
- else
- sel.I64REM(dst, src0, src1, tmp);
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ if(op == OP_DIV)
+ sel.I64DIV(dst, src0, src1, tmp);
+ else
+ sel.I64REM(dst, src0, src1, tmp);
+ sel.pop();
}
markAllChildren(dag);
return true;
@@ -1883,13 +1886,16 @@ namespace gbe
break;
case OP_ADDSAT:
if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
- GenRegister tmp[6];
+ GenRegister tmp[5];
for(int i=0; i<5; i++) {
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
tmp[i].type = GEN_TYPE_UD;
}
- tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL, true));
- sel.I64SATADD(dst, src0, src1, tmp);
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.I64SATADD(dst, src0, src1, tmp);
+ sel.pop();
break;
}
sel.push();
@@ -1924,13 +1930,16 @@ namespace gbe
break;
case OP_SUBSAT:
if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
- GenRegister tmp[6];
+ GenRegister tmp[5];
for(int i=0; i<5; i++) {
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
tmp[i].type = GEN_TYPE_UD;
}
- tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL, true));
- sel.I64SATSUB(dst, src0, src1, tmp);
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.I64SATSUB(dst, src0, src1, tmp);
+ sel.pop();
break;
}
sel.push();
@@ -1940,31 +1949,40 @@ namespace gbe
break;
case OP_SHL:
if (type == TYPE_S64 || type == TYPE_U64) {
- GenRegister tmp[7];
+ GenRegister tmp[6];
for(int i = 0; i < 6; i ++)
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
- tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true));
- sel.I64SHL(dst, src0, src1, tmp);
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.I64SHL(dst, src0, src1, tmp);
+ sel.pop();
} else
sel.SHL(dst, src0, src1);
break;
case OP_SHR:
if (type == TYPE_S64 || type == TYPE_U64) {
- GenRegister tmp[7];
+ GenRegister tmp[6];
for(int i = 0; i < 6; i ++)
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
- tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true));
- sel.I64SHR(dst, src0, src1, tmp);
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.I64SHR(dst, src0, src1, tmp);
+ sel.pop();
} else
sel.SHR(dst, src0, src1);
break;
case OP_ASR:
if (type == TYPE_S64 || type == TYPE_U64) {
- GenRegister tmp[7];
+ GenRegister tmp[6];
for(int i = 0; i < 6; i ++)
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
- tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true));
- sel.I64ASR(dst, src0, src1, tmp);
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.I64ASR(dst, src0, src1, tmp);
+ sel.pop();
} else
sel.ASR(dst, src0, src1);
break;
@@ -1975,13 +1993,16 @@ namespace gbe
}
case OP_I64_MUL_HI:
{
- GenRegister temp[10];
+ GenRegister temp[9];
for(int i=0; i<9; i++) {
temp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
temp[i].type = GEN_TYPE_UD;
}
- temp[9] = sel.selReg(sel.reg(FAMILY_BOOL, true));
- sel.I64_MUL_HI(dst, src0, src1, temp);
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.I64_MUL_HI(dst, src0, src1, temp);
+ sel.pop();
break;
}
case OP_MUL:
@@ -2752,17 +2773,6 @@ namespace gbe
sel.curr.flagGen = 1;
sel.CMP(getGenCompare(opcode), src0, src1, tmpDst);
}
-#if 0
- if((type == TYPE_S64 || type == TYPE_U64 ||
- type == TYPE_DOUBLE || type == TYPE_FLOAT ||
- type == TYPE_U32 || type == TYPE_S32) /*&&
- needStoreBool*/) {
- sel.curr.predicate = GEN_PREDICATE_NORMAL;
- sel.SEL(sel.selReg(dst, TYPE_U16),
- sel.selReg(ir::ocl::one, TYPE_U16),
- sel.selReg(ir::ocl::zero, TYPE_U16));
- }
-#endif
sel.pop();
return true;
}
@@ -2893,12 +2903,15 @@ namespace gbe
} else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
sel.CONVI64_TO_I(dst, src);
} else if (dstType == ir::TYPE_FLOAT && srcFamily == FAMILY_QWORD) {
- GenRegister tmp[7];
+ GenRegister tmp[6];
for(int i=0; i<6; i++) {
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
}
- tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true), TYPE_BOOL);
- sel.CONVI64_TO_F(dst, src, tmp);
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.CONVI64_TO_F(dst, src, tmp);
+ sel.pop();
} else if (dst.isdf()) {
ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
sel.MOV_DF(dst, src, sel.selReg(r));
@@ -2906,11 +2919,14 @@ namespace gbe
switch(src.type) {
case GEN_TYPE_F:
{
- GenRegister tmp[3];
+ GenRegister tmp[2];
tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
- tmp[2] = sel.selReg(sel.reg(FAMILY_BOOL, true), TYPE_BOOL);
- sel.CONVF_TO_I64(dst, src, tmp);
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.CONVF_TO_I64(dst, src, tmp);
+ sel.pop();
break;
}
case GEN_TYPE_DF:
@@ -3018,13 +3034,16 @@ namespace gbe
switch(insn.getOpcode()) {
case OP_I64MADSAT:
{
- GenRegister tmp[10];
+ GenRegister tmp[9];
for(int i=0; i<9; i++) {
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
tmp[i].type = GEN_TYPE_UD;
}
- tmp[9] = sel.selReg(sel.reg(FAMILY_BOOL, true));
- sel.I64MADSAT(dst, src0, src1, src2, tmp);
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.I64MADSAT(dst, src0, src1, src2, tmp);
+ sel.pop();
break;
}
case OP_MAD:
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index c6d7d586..937f5b22 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -584,6 +584,11 @@ namespace gbe
interval1.minID = std::min(interval1.minID, (int32_t)insn.ID);
interval1.maxID = std::max(interval1.maxID, (int32_t)insn.ID);
}
+ } else {
+ // If the instruction use the temporary flag register manually,
+ // we should invalidate the temp flag reg here.
+ if (insn.state.flag == 0 && insn.state.subFlag == 1)
+ validTempFlagReg = 0;
}
}
}