summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrander <rander.wang@intel.com>2017-07-04 15:20:08 +0800
committerYang Rong <rong.r.yang@intel.com>2017-07-06 17:51:59 +0800
commit49b63a260c66fc9438ad8b29344d8e1a4ba0285f (patch)
tree4be699915f8d298500aeb8b2aeb240eed0d069ec
parent3ece4846013dc213686a372ddf630e34e435ea5c (diff)
backend: refine fdiv to rcp at some cases
when the src0 of fdiv is a immedia value and it is exactly pow of 2, like 2.0f, 4.0f, 1.0/8.0f, fdiv %0, imm, %1 can be convert to rcp %0, %1 mul %0, %0, imm. for fdiv cost 8cycle, rcp 4cycle. it will save at least 3cycle. pass the conformance test and utests V2: refine negation flag V3: modify negation by negate Signed-off-by: rander.wang <rander.wang@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/backend/gen_insn_selection.cpp28
1 files changed, 28 insertions, 0 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 7498f382..c89a83e7 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3279,6 +3279,34 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
sel.MATH(dst, function, src0, src1);
} else if(type == TYPE_FLOAT) {
GBE_ASSERT(op != OP_REM);
+ SelectionDAG *child0 = dag.child[0];
+ if (child0 && child0->insn.getOpcode() == OP_LOADI) {
+ const auto &loadimm = cast<LoadImmInstruction>(child0->insn);
+ const Immediate imm = loadimm.getImmediate();
+ float immVal = imm.getFloatValue();
+ int* dwPtr = (int*)&immVal;
+
+ //if immedia is a exactly pow of 2, it can be converted to RCP
+ if((*dwPtr & 0x7FFFFF) == 0) {
+ if(immVal == -1.0f)
+ {
+ GenRegister tmp = GenRegister::negate(src1);
+ sel.MATH(dst, GEN_MATH_FUNCTION_INV, tmp);
+ }
+ else {
+ sel.MATH(dst, GEN_MATH_FUNCTION_INV, src1);
+ if(immVal != 1.0f) {
+ GenRegister isrc = GenRegister::immf(immVal);
+ sel.MUL(dst, dst, isrc);
+ }
+ }
+
+ if(dag.child[1])
+ dag.child[1]->isRoot = 1;
+ return true;
+ }
+ }
+
sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1);
} else if (type == TYPE_S64 || type == TYPE_U64) {
GenRegister tmp[15];