diff options
author | Guo Yejun <yejun.guo@intel.com> | 2017-06-08 12:40:58 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-06-16 16:34:13 +0800 |
commit | f48ec5a6c46436b7221d3360b36bcff6b7897403 (patch) | |
tree | 83764dfc01e8e28a871c05af8a9e92fcfb5b9c55 | |
parent | 7f1c190c1a419d9eff946018638dfdc57b207799 (diff) |
do constant folding for kernel struct args
for the following GEN IR, %41 is kernel argument (struct)
the first LOAD will be mov, and the second LOAD will be indirect move
(see lowerFunctionArguments). It hurts performance,
and even impacts the correctness of reg liveness of indriect mov
LOADI.uint64 %1114 72
ADD.int64 %78 %41 %1114
LOAD.int64.private.aligned {%79} %78 bti:255
LOADI.int64 %1115 8
ADD.int64 %1116 %78 %1115
LOAD.int64.private.aligned {%80} %1116 bti:255
this function folds the constants of 72 and 8 together,
and so it will be direct mov.
the GEN IR looks like:
LOADI.int64 %1115 80
ADD.int64 %1116 %41 %1115
Signed-off-by: Guo Yejun <yejun.guo@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/CMakeLists.txt | 2 | ||||
-rw-r--r-- | backend/src/ir/constopt.cpp | 144 | ||||
-rw-r--r-- | backend/src/ir/constopt.hpp | 54 | ||||
-rw-r--r-- | backend/src/ir/context.cpp | 5 | ||||
-rw-r--r-- | backend/src/ir/instruction.cpp | 7 | ||||
-rw-r--r-- | backend/src/ir/instruction.hpp | 1 |
6 files changed, 213 insertions, 0 deletions
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index c9ff8336..74d7baba 100644 --- a/backend/src/CMakeLists.txt +++ b/backend/src/CMakeLists.txt @@ -73,6 +73,8 @@ set (GBE_SRC ir/value.hpp ir/lowering.cpp ir/lowering.hpp + ir/constopt.cpp + ir/constopt.hpp ir/profiling.cpp ir/profiling.hpp ir/printf.cpp diff --git a/backend/src/ir/constopt.cpp b/backend/src/ir/constopt.cpp new file mode 100644 index 00000000..24878b80 --- /dev/null +++ b/backend/src/ir/constopt.cpp @@ -0,0 +1,144 @@ +/* + * Copyright © 2017 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Guo Yejun <yejun.guo@intel.com> + */ + +#include <assert.h> +#include "ir/context.hpp" +#include "ir/value.hpp" +#include "ir/constopt.hpp" +#include "sys/set.hpp" + +namespace gbe { +namespace ir { + + class FunctionStructArgConstOffsetFolder : public Context + { + public: + /*! Build the helper structure */ + FunctionStructArgConstOffsetFolder(Unit &unit) : Context(unit) { + records.clear(); + loadImms.clear(); + } + /*! Free everything we needed */ + virtual ~FunctionStructArgConstOffsetFolder() { + for (size_t i = 0; i < records.size(); ++i) { + delete records[i]; + } + records.clear(); + loadImms.clear(); + } + /*! Perform all function arguments substitution if needed */ + void folding(const std::string &name); + + private: + class Record { //add dst, arg (kernel struct arg base reg), imm_value + public: + Record(Register dst, Register arg, int64_t immv) : + _dst(dst), _arg(arg), _immv(immv) { } + Register _dst; + Register _arg; + int64_t _immv; + }; + std::vector<Record*> records; + std::map<Register, LoadImmInstruction*> loadImms; //<ir reg, load reg imm> + + void AddRecord(Register dst, Register arg, int64_t immv) { + Record* rec = new Record(dst, arg, immv); + records.push_back(rec); + } + }; + + void FunctionStructArgConstOffsetFolder::folding(const std::string &name) { + Function *fn = unit.getFunction(name); + if (fn == NULL) + return; + + const uint32_t argNum = fn->argNum(); + for (uint32_t argID = 0; argID < argNum; ++argID) { + FunctionArgument &arg = fn->getArg(argID); + if (arg.type != FunctionArgument::STRUCTURE) + continue; + AddRecord(arg.reg, arg.reg, 0); + } + + fn->foreachInstruction([&](Instruction &insn) { + if (insn.getOpcode() == OP_LOADI) { + LoadImmInstruction *loadImm = cast<LoadImmInstruction>(&insn); + if(!loadImm) + return; + + //to avoid regression, limit for the case: LOADI.int64 %1164 32 + //we can loose the limit if necessary + if (loadImm->getImmediate().getType() != TYPE_S64 && + loadImm->getImmediate().getType() != TYPE_U64) + return; + + Register dst = insn.getDst(); + loadImms[dst] = loadImm; + return; + } + + //we will change imm of loadi directly, so it should not be dst + for (size_t i = 0; i < insn.getDstNum(); ++i) { + Register dst = insn.getDst(i); + assert(loadImms.find(dst) == loadImms.end()); + } + + if (insn.getOpcode() != OP_ADD) + return; + + Register src0 = insn.getSrc(0); + Register src1 = insn.getSrc(1); + Register dst = insn.getDst(); + + //check if src0 is derived from kernel struct arg + std::vector<Record*>::iterator it = + std::find_if(records.begin(), records.end(), [=](Record* rec){ + return rec->_dst == src0; + } ); + if (it == records.end()) + return; + + //check if src1 is imm value + if (loadImms.find(src1) == loadImms.end()) + return; + + Record* rec = *it; + LoadImmInstruction *loadImm = loadImms[src1]; + Immediate imm = loadImm->getImmediate(); + int64_t newvalue = imm.getIntegerValue() + rec->_immv; + + if (rec->_dst != rec->_arg) { //directly dervied from arg if they are equal + //change src0 to be the kernel struct arg + insn.setSrc(0, rec->_arg); + + //change the value of src1 + ImmediateIndex immIndex = fn->newImmediate(newvalue); + loadImm->setImmediateIndex(immIndex); + } + AddRecord(dst, rec->_arg, newvalue); + }); + } + + void foldFunctionStructArgConstOffset(Unit &unit, const std::string &functionName) { + FunctionStructArgConstOffsetFolder folder(unit); + folder.folding(functionName); + } + +} /* namespace ir */ +} diff --git a/backend/src/ir/constopt.hpp b/backend/src/ir/constopt.hpp new file mode 100644 index 00000000..f272637d --- /dev/null +++ b/backend/src/ir/constopt.hpp @@ -0,0 +1,54 @@ +/* + * Copyright © 2017 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Guo Yejun <yejun.guo@intel.com> + */ + +#ifndef __GBE_IR_CONSTOPT_HPP__ +#define __GBE_IR_CONSTOPT_HPP__ + +namespace gbe { +namespace ir { + + // Structure to update + class Unit; + + // TODO + void foldConstant(Unit &unit, const std::string &functionName); + void propagateConstant(Unit &unit, const std::string &functionName); + + // for the following GEN IR, %41 is kernel argument (struct) + // the first LOAD will be mov, and the second LOAD will be indirect move + // (see lowerFunctionArguments). It hurts performance, + // and even impacts the correctness of reg liveness of indriect mov + // + // LOADI.uint64 %1114 72 + // ADD.int64 %78 %41 %1114 + // LOAD.int64.private.aligned {%79} %78 bti:255 + // LOADI.int64 %1115 8 + // ADD.int64 %1116 %78 %1115 + // LOAD.int64.private.aligned {%80} %1116 bti:255 + // + // this function folds the constants of 72 and 8 together, + // and so it will be direct mov. + // the GEN IR looks like: + // LOADI.int64 %1115 80 + // ADD.int64 %1116 %41 %1115 + void foldFunctionStructArgConstOffset(Unit &unit, const std::string &functionName); +} /* namespace ir */ +} /* namespace gbe */ + +#endif /* __GBE_IR_LOWERING_HPP__ */ diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp index e4aac088..f60d33f4 100644 --- a/backend/src/ir/context.cpp +++ b/backend/src/ir/context.cpp @@ -24,6 +24,7 @@ #include "ir/context.hpp" #include "ir/unit.hpp" #include "ir/lowering.hpp" +#include "ir/constopt.hpp" namespace gbe { namespace ir { @@ -82,6 +83,10 @@ namespace ir { fn->sortLabels(); fn->computeCFG(); + //TODO: do constant folding and propagation for GEN IR + //here as the first step, we just do constant folding for kernel struct args + foldFunctionStructArgConstOffset(unit, fn->getName()); + // Spill function argument to the stack if required and identify which // function arguments can use constant push lowerFunctionArguments(unit, fn->getName()); diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index a9156ffe..48590fd1 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -833,6 +833,9 @@ namespace ir { INLINE Immediate getImmediate(const Function &fn) const { return fn.getImmediate(immediateIndex); } + INLINE void setImmediateIndex(ImmediateIndex immIndex) { + immediateIndex = immIndex; + } INLINE Type getType(void) const { return this->type; } bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const; @@ -2445,6 +2448,10 @@ DECL_MEM_FN(MemInstruction, void, setBtiReg(Register reg), setBtiReg(reg)) return reinterpret_cast<const internal::LoadImmInstruction*>(this)->getImmediate(fn); } + void LoadImmInstruction::setImmediateIndex(ImmediateIndex immIndex) { + reinterpret_cast<internal::LoadImmInstruction*>(this)->setImmediateIndex(immIndex); + } + /////////////////////////////////////////////////////////////////////////// // Implements the emission functions /////////////////////////////////////////////////////////////////////////// diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 8685dd4b..05c3e649 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -389,6 +389,7 @@ namespace ir { public: /*! Return the value stored in the instruction */ Immediate getImmediate(void) const; + void setImmediateIndex(ImmediateIndex immIndex); /*! Return the type of the stored value */ Type getType(void) const; /*! Return true if the given instruction is an instance of this class */ |