diff options
author | rander <rander.wang@intel.com> | 2017-07-04 15:20:08 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-07-06 17:51:59 +0800 |
commit | 49b63a260c66fc9438ad8b29344d8e1a4ba0285f (patch) | |
tree | 4be699915f8d298500aeb8b2aeb240eed0d069ec | |
parent | 3ece4846013dc213686a372ddf630e34e435ea5c (diff) |
backend: refine fdiv to rcp at some cases
when the src0 of fdiv is a immedia value and it is
exactly pow of 2, like 2.0f, 4.0f, 1.0/8.0f,
fdiv %0, imm, %1 can be convert to
rcp %0, %1
mul %0, %0, imm.
for fdiv cost 8cycle, rcp 4cycle. it will save at least
3cycle.
pass the conformance test and utests
V2: refine negation flag
V3: modify negation by negate
Signed-off-by: rander.wang <rander.wang@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 7498f382..c89a83e7 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -3279,6 +3279,34 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.MATH(dst, function, src0, src1); } else if(type == TYPE_FLOAT) { GBE_ASSERT(op != OP_REM); + SelectionDAG *child0 = dag.child[0]; + if (child0 && child0->insn.getOpcode() == OP_LOADI) { + const auto &loadimm = cast<LoadImmInstruction>(child0->insn); + const Immediate imm = loadimm.getImmediate(); + float immVal = imm.getFloatValue(); + int* dwPtr = (int*)&immVal; + + //if immedia is a exactly pow of 2, it can be converted to RCP + if((*dwPtr & 0x7FFFFF) == 0) { + if(immVal == -1.0f) + { + GenRegister tmp = GenRegister::negate(src1); + sel.MATH(dst, GEN_MATH_FUNCTION_INV, tmp); + } + else { + sel.MATH(dst, GEN_MATH_FUNCTION_INV, src1); + if(immVal != 1.0f) { + GenRegister isrc = GenRegister::immf(immVal); + sel.MUL(dst, dst, isrc); + } + } + + if(dag.child[1]) + dag.child[1]->isRoot = 1; + return true; + } + } + sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1); } else if (type == TYPE_S64 || type == TYPE_U64) { GenRegister tmp[15]; |