summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--backend/src/backend/gen8_context.cpp31
-rw-r--r--backend/src/backend/gen8_context.hpp2
-rw-r--r--backend/src/backend/gen_context.hpp2
-rw-r--r--backend/src/backend/gen_insn_selection.cpp43
4 files changed, 66 insertions, 12 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 8960d5be..daa4182b 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -531,6 +531,37 @@ namespace gbe
p->ADD(dst, dst, tmp_dst);
}
+ void Gen8Context::emitI64DIVREMInstruction(const SelectionInstruction &cnst_insn)
+ {
+ SelectionInstruction* insn = const_cast<SelectionInstruction*>(&cnst_insn);
+ GenRegister packed_src0 = ra->genReg(insn->src(0));
+ GenRegister packed_src1 = ra->genReg(insn->src(1));
+ GenRegister dst = ra->genReg(insn->dst(0));
+ int tmp_reg_n = 14;
+
+ if (packed_src0.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ GenRegister unpacked_src0 = ra->genReg(insn->dst(tmp_reg_n));
+ unpackLongVec(packed_src0, unpacked_src0, p->curr.execWidth);
+ tmp_reg_n++;
+ insn->src(0) = unpacked_src0;
+ }
+ if (packed_src1.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ GenRegister unpacked_src1 = ra->genReg(insn->dst(tmp_reg_n));
+ unpackLongVec(packed_src1, unpacked_src1, p->curr.execWidth);
+ tmp_reg_n++;
+ insn->src(1) = unpacked_src1;
+ }
+ GBE_ASSERT(tmp_reg_n <= insn->dstNum);
+
+ GenContext::emitI64DIVREMInstruction(*insn);
+
+ if (dst.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ GenRegister dst_packed = ra->genReg(insn->dst(14));
+ packLongVec(dst, dst_packed, p->curr.execWidth);
+ p->MOV(dst, dst_packed);
+ }
+ }
+
void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd)
{
GBE_ASSERT(packed.subnr == 0);
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 9f1d7498..bea78b65 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -60,6 +60,8 @@ namespace gbe
virtual void emitWrite64Instruction(const SelectionInstruction &insn);
virtual void emitRead64Instruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
+ virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
+
protected:
virtual GenEncoder* generateEncoder(void) {
return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index a366e7f2..3d01f2b1 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -164,7 +164,7 @@ namespace gbe
void emitUnSpillRegInstruction(const SelectionInstruction &insn);
void emitGetImageInfoInstruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
- void emitI64DIVREMInstruction(const SelectionInstruction &insn);
+ virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index d4e33c5a..27292a3b 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -617,9 +617,9 @@ namespace gbe
/*! Multiply 64-bit integers */
void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long);
/*! 64-bit integer division */
- void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
+ void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
/*! 64-bit integer remainder of division */
- void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
+ void I64REM(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
/* common functions for both binary instruction and sel_cmp and compare instruction.
It will handle the IMM or normal register assignment, and will try to avoid LOADI
as much as possible. */
@@ -1380,21 +1380,21 @@ namespace gbe
}
}
- void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 14, 2);
+ void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmp_num) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, tmp_num + 1, 2);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i = 0; i < 13; i++)
+ for(int i = 0; i < tmp_num; i++)
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 14, 2);
+ void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmp_num) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, tmp_num + 1, 2);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i = 0; i < 13; i++)
+ for(int i = 0; i < tmp_num; i++)
insn->dst(i + 1) = tmp[i];
}
@@ -2201,18 +2201,39 @@ namespace gbe
GBE_ASSERT(op != OP_REM);
sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1);
} else if (type == TYPE_S64 || type == TYPE_U64) {
- GenRegister tmp[13];
+ GenRegister tmp[15];
+ int tmp_num = 13;
for(int i=0; i < 13; i++) {
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
tmp[i].type = GEN_TYPE_UD;
}
+
+ if (sel.hasLongType()) {
+ if (!sel.isScalarReg(insn.getSrc(0))) {
+ tmp[tmp_num] = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), src0.type);
+ tmp_num++;
+ }
+
+ if (!sel.isScalarReg(insn.getSrc(1))) {
+ tmp[tmp_num] = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), src1.type);
+ tmp_num++;
+ }
+
+ /* We at least one tmp register to convert if dst is not scalar. */
+ if (!sel.isScalarReg(insn.getDst(0)) && sel.isScalarReg(insn.getSrc(0))
+ && sel.isScalarReg(insn.getSrc(1))) {
+ GBE_ASSERT(tmp_num == 13);
+ tmp[tmp_num] = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+ tmp_num++;
+ }
+ }
sel.push();
sel.curr.flag = 0;
sel.curr.subFlag = 1;
if(op == OP_DIV)
- sel.I64DIV(dst, src0, src1, tmp);
+ sel.I64DIV(dst, src0, src1, tmp, tmp_num);
else
- sel.I64REM(dst, src0, src1, tmp);
+ sel.I64REM(dst, src0, src1, tmp, tmp_num);
sel.pop();
}
markAllChildren(dag);