summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2015-08-31 16:18:32 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-09-01 17:43:11 +0800
commit06710d7b5c7ba88a9b7de83251acd840029c1a44 (patch)
treefa8bfb98ec03950c5d6a4169c732906fbb08fe48
parent5f83f22acc2416b95e74909754b3661fc6ed8011 (diff)
GBE: implement further phi mov optimization based on intra-BB interefering analysis.
The previous phi mov optimization try to reduce the phi copy source register and the phi copy register if the phi copy source register is a normal SSA value. But for some cases, many phi copy source registers are also phi copy value which has multiple definitions. And they could all be reduced to one phi copy register if there is no interfering in all BBs. This patch with the previous patches could reduce the whole spilled register from 200+ to only 70 for a SGEMM kernel and the performance could boost about 10 times. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp134
1 files changed, 128 insertions, 6 deletions
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 38c63ce4..1d097277 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -629,7 +629,15 @@ namespace gbe
/*! Will try to remove MOVs due to PHI resolution */
void removeMOVs(const ir::Liveness &liveness, ir::Function &fn);
/*! Optimize phi move based on liveness information */
- void optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn);
+ void optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn,
+ map <ir::Register, ir::Register> &replaceMap,
+ map <ir::Register, ir::Register> &redundantPhiCopyMap);
+ /*! further optimization after phi copy optimization.
+ * Global liveness interefering checking based redundant phy value
+ * elimination. */
+ void postPhiCopyOptimization(ir::Liveness &liveness, ir::Function &fn,
+ map <ir::Register, ir::Register> &replaceMap,
+ map <ir::Register, ir::Register> &redundantPhiCopyMap);
/*! Will try to remove redundants LOADI in basic blocks */
void removeLOADIs(const ir::Liveness &liveness, ir::Function &fn);
/*! To avoid lost copy, we need two values for PHI. This function create a
@@ -2157,7 +2165,9 @@ namespace gbe
});
}
- void GenWriter::optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn)
+ void GenWriter::optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn,
+ map<ir::Register, ir::Register> &replaceMap,
+ map<ir::Register, ir::Register> &redundantPhiCopyMap)
{
// The overall idea behind is we check whether there is any interference
// between phi and phiCopy live range. If there is no point that
@@ -2168,7 +2178,6 @@ namespace gbe
using namespace ir;
ir::FunctionDAG *dag = new ir::FunctionDAG(liveness);
-
for (auto &it : phiMap) {
const Register phi = it.first;
const Register phiCopy = it.second;
@@ -2248,8 +2257,13 @@ namespace gbe
const Instruction *phiSrcUseInsn = s->getInstruction();
replaceSrc(const_cast<Instruction *>(phiSrcUseInsn), phiCopySrc, phiCopy);
}
+ replaceMap.insert(std::make_pair(phiCopySrc, phiCopy));
}
}
+ } else {
+ if (((*(phiCopySrcDef->begin()))->getType() == ValueDef::DEF_INSN_DST) &&
+ redundantPhiCopyMap.find(phiCopySrc) == redundantPhiCopyMap.end())
+ redundantPhiCopyMap.insert(std::make_pair(phiCopySrc, phiCopy));
}
// If phi is used in the same BB that define the phiCopy,
@@ -2281,7 +2295,7 @@ namespace gbe
}
}
- // coalease phi and phiCopy
+ // coalease phi and phiCopy
if (isOpt) {
for (auto &x : *phiDef) {
const_cast<Instruction *>(x->getInstruction())->remove();
@@ -2289,8 +2303,112 @@ namespace gbe
for (auto &x : *phiUse) {
const Instruction *phiUseInsn = x->getInstruction();
replaceSrc(const_cast<Instruction *>(phiUseInsn), phi, phiCopy);
+ replaceMap.insert(std::make_pair(phi, phiCopy));
+ }
+ }
+ }
+ delete dag;
+ }
+
+ void GenWriter::postPhiCopyOptimization(ir::Liveness &liveness,
+ ir::Function &fn, map <ir::Register, ir::Register> &replaceMap,
+ map <ir::Register, ir::Register> &redundantPhiCopyMap)
+ {
+ // When doing the first pass phi copy optimization, we skip all the phi src MOV cases
+ // whoes phiSrdDefs are also a phi value. We leave it here when all phi copy optimizations
+ // have been done. Then we don't need to worry about there are still reducible phi copy remained.
+ // We only need to check whether those possible redundant phi copy pairs' interfering to
+ // each other globally, by leverage the DAG information.
+ using namespace ir;
+
+ // Firstly, validate all possible redundant phi copy map and update liveness information
+ // accordingly.
+ if (replaceMap.size() != 0) {
+ for (auto pair : replaceMap) {
+ if (redundantPhiCopyMap.find(pair.first) != redundantPhiCopyMap.end()) {
+ auto it = redundantPhiCopyMap.find(pair.first);
+ Register phiCopy = it->second;
+ Register newPhiCopySrc = pair.second;
+ redundantPhiCopyMap.erase(it);
+ redundantPhiCopyMap.insert(std::make_pair(newPhiCopySrc, phiCopy));
+ }
+ }
+ liveness.replaceRegs(replaceMap);
+ replaceMap.clear();
+ }
+ if (redundantPhiCopyMap.size() == 0)
+ return;
+ auto dag = new FunctionDAG(liveness);
+
+ map<Register, Register> newRedundant;
+ map<Register, Register> *curRedundant = &redundantPhiCopyMap;
+ map<Register, Register> *nextRedundant = &newRedundant, tmp;
+ map<Register, Register> replacedRegs, revReplacedRegs;
+ // Do multi pass redundant phi copy elimination based on the global interfering information.
+ // FIXME, we don't need to re-compute the whole DAG for each pass.
+ while (curRedundant->size() > 0) {
+ for (auto &pair : *curRedundant) {
+ auto phiCopySrc = pair.first;
+ auto phiCopy = pair.second;
+ if (replacedRegs.find(phiCopy) != replacedRegs.end() ||
+ revReplacedRegs.find(phiCopy) != revReplacedRegs.end() ||
+ revReplacedRegs.find(phiCopySrc) != revReplacedRegs.end())
+ continue;
+ if (!dag->interfere(liveness, phiCopySrc, phiCopy)) {
+ const ir::DefSet *phiCopySrcDef = dag->getRegDef(phiCopySrc);
+ const ir::UseSet *phiCopySrcUse = dag->getRegUse(phiCopySrc);
+ for (auto &s : *phiCopySrcDef) {
+ const Instruction *phiSrcDefInsn = s->getInstruction();
+ if (phiSrcDefInsn->getOpcode() == ir::OP_MOV &&
+ phiSrcDefInsn->getSrc(0) == phiCopy) {
+ const_cast<Instruction *>(phiSrcDefInsn)->remove();
+ continue;
+ }
+ replaceDst(const_cast<Instruction *>(phiSrcDefInsn), phiCopySrc, phiCopy);
+ }
+
+ for (auto &s : *phiCopySrcUse) {
+ const Instruction *phiSrcUseInsn = s->getInstruction();
+ if (phiSrcUseInsn->getOpcode() == ir::OP_MOV &&
+ phiSrcUseInsn->getDst(0) == phiCopy) {
+ const_cast<Instruction *>(phiSrcUseInsn)->remove();
+ continue;
+ }
+ replaceSrc(const_cast<Instruction *>(phiSrcUseInsn), phiCopySrc, phiCopy);
+ }
+
+ replacedRegs.insert(std::make_pair(phiCopySrc, phiCopy));
+ revReplacedRegs.insert(std::make_pair(phiCopy, phiCopySrc));
+ curRedundant->erase(phiCopySrc);
}
}
+
+ if (replacedRegs.size() != 0) {
+ liveness.replaceRegs(replacedRegs);
+ for (auto &pair : *curRedundant) {
+ auto from = pair.first;
+ auto to = pair.second;
+ bool revisit = false;
+ if (replacedRegs.find(pair.second) != replacedRegs.end()) {
+ to = replacedRegs.find(to)->second;
+ revisit = true;
+ }
+ if (revReplacedRegs.find(from) != revReplacedRegs.end() ||
+ revReplacedRegs.find(to) != revReplacedRegs.end())
+ revisit = true;
+ if (revisit)
+ nextRedundant->insert(std::make_pair(from, to));
+ }
+ std::swap(curRedundant, nextRedundant);
+ } else
+ break;
+
+ break;
+ nextRedundant->clear();
+ replacedRegs.clear();
+ revReplacedRegs.clear();
+ delete dag;
+ dag = new ir::FunctionDAG(liveness);
}
delete dag;
}
@@ -2754,8 +2872,12 @@ namespace gbe
ir::Liveness liveness(fn);
if (OCL_OPTIMIZE_LOADI) this->removeLOADIs(liveness, fn);
- if (OCL_OPTIMIZE_PHI_MOVES) this->optimizePhiCopy(liveness, fn);
- if (OCL_OPTIMIZE_PHI_MOVES) this->removeMOVs(liveness, fn);
+ if (OCL_OPTIMIZE_PHI_MOVES) {
+ map <ir::Register, ir::Register> replaceMap, redundantPhiCopyMap;
+ this->optimizePhiCopy(liveness, fn, replaceMap, redundantPhiCopyMap);
+ this->postPhiCopyOptimization(liveness, fn, replaceMap, redundantPhiCopyMap);
+ this->removeMOVs(liveness, fn);
+ }
}
void GenWriter::regAllocateReturnInst(ReturnInst &I) {}