summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2015-01-06 18:02:39 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-01-20 16:31:05 +0800
commit72aa942c654cd6dee5f9214b9db6bdba0ae550b9 (patch)
tree7aea9312776cec028d96c1161851bfbc7635cdb1
parentb0a375a20a85c2e2b3f96b2e4400e14249fe58a0 (diff)
Overload the I64MADSAT function.
Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--backend/src/backend/gen8_context.cpp148
-rw-r--r--backend/src/backend/gen8_context.hpp2
-rw-r--r--backend/src/backend/gen_context.hpp2
-rw-r--r--backend/src/backend/gen_insn_selection.cpp40
4 files changed, 176 insertions, 16 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index a8167809..8960d5be 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -218,6 +218,154 @@ namespace gbe
}
}
+ void Gen8Context::emitI64MADSATInstruction(const SelectionInstruction &insn)
+ {
+ GenRegister src0 = ra->genReg(insn.src(0));
+ GenRegister src1 = ra->genReg(insn.src(1));
+ GenRegister src2 = ra->genReg(insn.src(2));
+ GenRegister dst_l = ra->genReg(insn.dst(0));
+ GenRegister dst_h = ra->genReg(insn.dst(1));
+ GenRegister s0_abs = ra->genReg(insn.dst(2));
+ GenRegister s1_abs = ra->genReg(insn.dst(3));
+ GenRegister tmp0 = ra->genReg(insn.dst(4));
+ GenRegister tmp1 = ra->genReg(insn.dst(5));
+ GenRegister sign = ra->genReg(insn.dst(6));
+ GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
+
+ if (src0.type == GEN_TYPE_UL) {
+ /* Always should be the same long type. */
+ GBE_ASSERT(src1.type == GEN_TYPE_UL);
+ GBE_ASSERT(src2.type == GEN_TYPE_UL);
+ dst_l.type = dst_h.type = GEN_TYPE_UL;
+ tmp0.type = tmp1.type = GEN_TYPE_UL;
+ calculateFullU64MUL(p, src0, src1, dst_h, dst_l, tmp0, tmp1);
+
+ /* Inplement the logic:
+ dst_l += src2;
+ if (dst_h)
+ dst_l = 0xFFFFFFFFFFFFFFFFULL;
+ if (dst_l < src2) // carry if overflow
+ dst_l = 0xFFFFFFFFFFFFFFFFULL;
+ */
+ p->ADD(dst_l, dst_l, src2);
+
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_NZ, dst_h, GenRegister::immud(0), tmp0);
+ p->curr.noMask = 0;
+ p->MOV(dst_l, GenRegister::immuint64(0xFFFFFFFFFFFFFFFF));
+ p->pop();
+
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_L, dst_l, src2, tmp0);
+ p->curr.noMask = 0;
+ p->MOV(dst_l, GenRegister::immuint64(0xFFFFFFFFFFFFFFFF));
+ p->pop();
+ } else {
+ GBE_ASSERT(src0.type == GEN_TYPE_L);
+ GBE_ASSERT(src1.type == GEN_TYPE_L);
+ GBE_ASSERT(src2.type == GEN_TYPE_L);
+
+ calculateFullS64MUL(p, src0, src1, dst_h, dst_l, s0_abs, s1_abs, tmp0,
+ tmp1, sign, flagReg);
+
+ GenRegister sum = sign;
+ sum.type = GEN_TYPE_UL;
+ src2.type = GEN_TYPE_L;
+ dst_l.type = GEN_TYPE_UL;
+ p->NOP();
+ p->ADD(sum, src2, dst_l);
+
+ /* Implement this logic:
+ if(src2 >= 0) {
+ if(dst_l > sum) {
+ dst_h++;
+ if(CL_LONG_MIN == dst_h) {
+ dst_h = CL_LONG_MAX;
+ sum = CL_ULONG_MAX;
+ }
+ }
+ } */
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_GE, src2, GenRegister::immud(0), tmp1);
+ p->curr.noMask = 0;
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->CMP(GEN_CONDITIONAL_G, dst_l, sum, tmp1);
+ p->ADD(dst_h, dst_h, GenRegister::immud(1));
+ p->MOV(tmp0, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL));
+ p->CMP(GEN_CONDITIONAL_EQ, dst_h, tmp0, tmp1);
+ p->MOV(dst_h, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL));
+ p->MOV(sum, GenRegister::immuint64(0xFFFFFFFFFFFFFFFFULL));
+ p->pop();
+ p->NOP();
+
+ /* Implement this logic:
+ else {
+ if(dst_l < sum) {
+ dst_h--;
+ if(CL_LONG_MAX == dst_h) {
+ dst_h = CL_LONG_MIN;
+ sum = 0;
+ }
+ }
+ } */
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_L, src2, GenRegister::immud(0), tmp1);
+ p->curr.noMask = 0;
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->CMP(GEN_CONDITIONAL_L, dst_l, sum, tmp1);
+ p->ADD(dst_h, dst_h, GenRegister::immd(-1));
+ p->MOV(tmp0, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL));
+ p->CMP(GEN_CONDITIONAL_EQ, dst_h, tmp0, tmp1);
+ p->MOV(dst_h, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL));
+ p->MOV(sum, GenRegister::immud(0));
+ p->pop();
+ p->NOP();
+
+ /* saturate logic:
+ if(dst_h > 0)
+ sum = CL_LONG_MAX;
+ else if(dst_h < -1)
+ sum = CL_LONG_MIN;
+ cl_long result = (cl_long) sum; */
+ p->MOV(dst_l, sum);
+
+ dst_h.type = GEN_TYPE_L;
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_G, dst_h, GenRegister::immud(0), tmp1);
+ p->curr.noMask = 0;
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(dst_l, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL));
+ p->pop();
+ p->NOP();
+
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_L, dst_h, GenRegister::immd(-1), tmp1);
+ p->curr.noMask = 0;
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(dst_l, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL));
+ p->pop();
+ p->NOP();
+ }
+ }
+
void Gen8Context::emitI64MULInstruction(const SelectionInstruction &insn)
{
GenRegister src0 = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 6dc8afc7..9f1d7498 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -55,6 +55,8 @@ namespace gbe
virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
virtual void emitI64RHADDInstruction(const SelectionInstruction &insn);
virtual void emitI64HADDInstruction(const SelectionInstruction &insn);
+ virtual void emitI64MADSATInstruction(const SelectionInstruction &insn);
+
virtual void emitWrite64Instruction(const SelectionInstruction &insn);
virtual void emitRead64Instruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 49d60174..a366e7f2 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -130,7 +130,7 @@ namespace gbe
virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
void emitTernaryInstruction(const SelectionInstruction &insn);
virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
- void emitI64MADSATInstruction(const SelectionInstruction &insn);
+ virtual void emitI64MADSATInstruction(const SelectionInstruction &insn);
virtual void emitI64HADDInstruction(const SelectionInstruction &insn);
virtual void emitI64RHADDInstruction(const SelectionInstruction &insn);
void emitI64ShiftInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 8615adc8..d4e33c5a 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -527,7 +527,7 @@ namespace gbe
/*! Convert 64-bit integer to 32-bit float */
void CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[2]);
/*! Saturated 64bit x*y + z */
- void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]);
+ void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister* tmp, int tmp_num);
/*! High 64bit of x*y */
void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_num);
/*! (x+y)>>1 without mod. overflow */
@@ -1477,13 +1477,13 @@ namespace gbe
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 10, 3);
+ void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister *tmp, int tmp_num) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, tmp_num + 1, 3);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
insn->src(2) = src2;
- for(int i = 0; i < 9; i ++)
+ for(int i = 0; i < tmp_num; i ++)
insn->dst(i + 1) = tmp[i];
}
@@ -4060,17 +4060,27 @@ namespace gbe
switch(insn.getOpcode()) {
case OP_I64MADSAT:
{
- GenRegister tmp[9];
- for(int i=0; i<9; i++) {
- tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
- tmp[i].type = GEN_TYPE_UD;
- }
- sel.push();
- sel.curr.flag = 0;
- sel.curr.subFlag = 1;
- sel.I64MADSAT(dst, src0, src1, src2, tmp);
- sel.pop();
- break;
+ GenRegister tmp[9];
+ int tmp_num;
+ if (!sel.hasLongType()) {
+ tmp_num = 9;
+ for(int i=0; i<9; i++) {
+ tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+ tmp[i].type = GEN_TYPE_UD;
+ }
+ } else {
+ tmp_num = 6;
+ for(int i=0; i<6; i++) {
+ tmp[i] = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+ tmp[i].type = GEN_TYPE_UL;
+ }
+ }
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.I64MADSAT(dst, src0, src1, src2, tmp, tmp_num);
+ sel.pop();
+ break;
}
case OP_MAD:
{