summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2015-01-06 18:02:46 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-01-20 16:31:08 +0800
commitc973c8262b754d6cc30780d52be1edf345f3de01 (patch)
tree063a992122cb58306bcdd4e5c4422b86c56f8fc1
parent72aa942c654cd6dee5f9214b9db6bdba0ae550b9 (diff)
Overlaod I64 Div and Rem function.
Because the math shared function does not support 64bits div and rem, we can just unpack the I64 and use old function to handle it. Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--backend/src/backend/gen8_context.cpp31
-rw-r--r--backend/src/backend/gen8_context.hpp2
-rw-r--r--backend/src/backend/gen_context.hpp2
-rw-r--r--backend/src/backend/gen_insn_selection.cpp43
4 files changed, 66 insertions, 12 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 8960d5be..daa4182b 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -531,6 +531,37 @@ namespace gbe
p->ADD(dst, dst, tmp_dst);
}
+ void Gen8Context::emitI64DIVREMInstruction(const SelectionInstruction &cnst_insn)
+ {
+ SelectionInstruction* insn = const_cast<SelectionInstruction*>(&cnst_insn);
+ GenRegister packed_src0 = ra->genReg(insn->src(0));
+ GenRegister packed_src1 = ra->genReg(insn->src(1));
+ GenRegister dst = ra->genReg(insn->dst(0));
+ int tmp_reg_n = 14;
+
+ if (packed_src0.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ GenRegister unpacked_src0 = ra->genReg(insn->dst(tmp_reg_n));
+ unpackLongVec(packed_src0, unpacked_src0, p->curr.execWidth);
+ tmp_reg_n++;
+ insn->src(0) = unpacked_src0;
+ }
+ if (packed_src1.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ GenRegister unpacked_src1 = ra->genReg(insn->dst(tmp_reg_n));
+ unpackLongVec(packed_src1, unpacked_src1, p->curr.execWidth);
+ tmp_reg_n++;
+ insn->src(1) = unpacked_src1;
+ }
+ GBE_ASSERT(tmp_reg_n <= insn->dstNum);
+
+ GenContext::emitI64DIVREMInstruction(*insn);
+
+ if (dst.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ GenRegister dst_packed = ra->genReg(insn->dst(14));
+ packLongVec(dst, dst_packed, p->curr.execWidth);
+ p->MOV(dst, dst_packed);
+ }
+ }
+
void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd)
{
GBE_ASSERT(packed.subnr == 0);
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 9f1d7498..bea78b65 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -60,6 +60,8 @@ namespace gbe
virtual void emitWrite64Instruction(const SelectionInstruction &insn);
virtual void emitRead64Instruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
+ virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
+
protected:
virtual GenEncoder* generateEncoder(void) {
return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index a366e7f2..3d01f2b1 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -164,7 +164,7 @@ namespace gbe
void emitUnSpillRegInstruction(const SelectionInstruction &insn);
void emitGetImageInfoInstruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
- void emitI64DIVREMInstruction(const SelectionInstruction &insn);
+ virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index d4e33c5a..27292a3b 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -617,9 +617,9 @@ namespace gbe
/*! Multiply 64-bit integers */
void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long);
/*! 64-bit integer division */
- void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
+ void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
/*! 64-bit integer remainder of division */
- void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
+ void I64REM(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
/* common functions for both binary instruction and sel_cmp and compare instruction.
It will handle the IMM or normal register assignment, and will try to avoid LOADI
as much as possible. */
@@ -1380,21 +1380,21 @@ namespace gbe
}
}
- void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 14, 2);
+ void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmp_num) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, tmp_num + 1, 2);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i = 0; i < 13; i++)
+ for(int i = 0; i < tmp_num; i++)
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 14, 2);
+ void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmp_num) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, tmp_num + 1, 2);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i = 0; i < 13; i++)
+ for(int i = 0; i < tmp_num; i++)
insn->dst(i + 1) = tmp[i];
}
@@ -2201,18 +2201,39 @@ namespace gbe
GBE_ASSERT(op != OP_REM);
sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1);
} else if (type == TYPE_S64 || type == TYPE_U64) {
- GenRegister tmp[13];
+ GenRegister tmp[15];
+ int tmp_num = 13;
for(int i=0; i < 13; i++) {
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
tmp[i].type = GEN_TYPE_UD;
}
+
+ if (sel.hasLongType()) {
+ if (!sel.isScalarReg(insn.getSrc(0))) {
+ tmp[tmp_num] = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), src0.type);
+ tmp_num++;
+ }
+
+ if (!sel.isScalarReg(insn.getSrc(1))) {
+ tmp[tmp_num] = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), src1.type);
+ tmp_num++;
+ }
+
+ /* We at least one tmp register to convert if dst is not scalar. */
+ if (!sel.isScalarReg(insn.getDst(0)) && sel.isScalarReg(insn.getSrc(0))
+ && sel.isScalarReg(insn.getSrc(1))) {
+ GBE_ASSERT(tmp_num == 13);
+ tmp[tmp_num] = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+ tmp_num++;
+ }
+ }
sel.push();
sel.curr.flag = 0;
sel.curr.subFlag = 1;
if(op == OP_DIV)
- sel.I64DIV(dst, src0, src1, tmp);
+ sel.I64DIV(dst, src0, src1, tmp, tmp_num);
else
- sel.I64REM(dst, src0, src1, tmp);
+ sel.I64REM(dst, src0, src1, tmp, tmp_num);
sel.pop();
}
markAllChildren(dag);