summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuo Yejun <yejun.guo@intel.com>2017-06-08 12:40:58 +0800
committerYang Rong <rong.r.yang@intel.com>2017-06-16 16:34:13 +0800
commitf48ec5a6c46436b7221d3360b36bcff6b7897403 (patch)
tree83764dfc01e8e28a871c05af8a9e92fcfb5b9c55
parent7f1c190c1a419d9eff946018638dfdc57b207799 (diff)
do constant folding for kernel struct args
for the following GEN IR, %41 is kernel argument (struct) the first LOAD will be mov, and the second LOAD will be indirect move (see lowerFunctionArguments). It hurts performance, and even impacts the correctness of reg liveness of indriect mov LOADI.uint64 %1114 72 ADD.int64 %78 %41 %1114 LOAD.int64.private.aligned {%79} %78 bti:255 LOADI.int64 %1115 8 ADD.int64 %1116 %78 %1115 LOAD.int64.private.aligned {%80} %1116 bti:255 this function folds the constants of 72 and 8 together, and so it will be direct mov. the GEN IR looks like: LOADI.int64 %1115 80 ADD.int64 %1116 %41 %1115 Signed-off-by: Guo Yejun <yejun.guo@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/CMakeLists.txt2
-rw-r--r--backend/src/ir/constopt.cpp144
-rw-r--r--backend/src/ir/constopt.hpp54
-rw-r--r--backend/src/ir/context.cpp5
-rw-r--r--backend/src/ir/instruction.cpp7
-rw-r--r--backend/src/ir/instruction.hpp1
6 files changed, 213 insertions, 0 deletions
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index c9ff8336..74d7baba 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -73,6 +73,8 @@ set (GBE_SRC
ir/value.hpp
ir/lowering.cpp
ir/lowering.hpp
+ ir/constopt.cpp
+ ir/constopt.hpp
ir/profiling.cpp
ir/profiling.hpp
ir/printf.cpp
diff --git a/backend/src/ir/constopt.cpp b/backend/src/ir/constopt.cpp
new file mode 100644
index 00000000..24878b80
--- /dev/null
+++ b/backend/src/ir/constopt.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Guo Yejun <yejun.guo@intel.com>
+ */
+
+#include <assert.h>
+#include "ir/context.hpp"
+#include "ir/value.hpp"
+#include "ir/constopt.hpp"
+#include "sys/set.hpp"
+
+namespace gbe {
+namespace ir {
+
+ class FunctionStructArgConstOffsetFolder : public Context
+ {
+ public:
+ /*! Build the helper structure */
+ FunctionStructArgConstOffsetFolder(Unit &unit) : Context(unit) {
+ records.clear();
+ loadImms.clear();
+ }
+ /*! Free everything we needed */
+ virtual ~FunctionStructArgConstOffsetFolder() {
+ for (size_t i = 0; i < records.size(); ++i) {
+ delete records[i];
+ }
+ records.clear();
+ loadImms.clear();
+ }
+ /*! Perform all function arguments substitution if needed */
+ void folding(const std::string &name);
+
+ private:
+ class Record { //add dst, arg (kernel struct arg base reg), imm_value
+ public:
+ Record(Register dst, Register arg, int64_t immv) :
+ _dst(dst), _arg(arg), _immv(immv) { }
+ Register _dst;
+ Register _arg;
+ int64_t _immv;
+ };
+ std::vector<Record*> records;
+ std::map<Register, LoadImmInstruction*> loadImms; //<ir reg, load reg imm>
+
+ void AddRecord(Register dst, Register arg, int64_t immv) {
+ Record* rec = new Record(dst, arg, immv);
+ records.push_back(rec);
+ }
+ };
+
+ void FunctionStructArgConstOffsetFolder::folding(const std::string &name) {
+ Function *fn = unit.getFunction(name);
+ if (fn == NULL)
+ return;
+
+ const uint32_t argNum = fn->argNum();
+ for (uint32_t argID = 0; argID < argNum; ++argID) {
+ FunctionArgument &arg = fn->getArg(argID);
+ if (arg.type != FunctionArgument::STRUCTURE)
+ continue;
+ AddRecord(arg.reg, arg.reg, 0);
+ }
+
+ fn->foreachInstruction([&](Instruction &insn) {
+ if (insn.getOpcode() == OP_LOADI) {
+ LoadImmInstruction *loadImm = cast<LoadImmInstruction>(&insn);
+ if(!loadImm)
+ return;
+
+ //to avoid regression, limit for the case: LOADI.int64 %1164 32
+ //we can loose the limit if necessary
+ if (loadImm->getImmediate().getType() != TYPE_S64 &&
+ loadImm->getImmediate().getType() != TYPE_U64)
+ return;
+
+ Register dst = insn.getDst();
+ loadImms[dst] = loadImm;
+ return;
+ }
+
+ //we will change imm of loadi directly, so it should not be dst
+ for (size_t i = 0; i < insn.getDstNum(); ++i) {
+ Register dst = insn.getDst(i);
+ assert(loadImms.find(dst) == loadImms.end());
+ }
+
+ if (insn.getOpcode() != OP_ADD)
+ return;
+
+ Register src0 = insn.getSrc(0);
+ Register src1 = insn.getSrc(1);
+ Register dst = insn.getDst();
+
+ //check if src0 is derived from kernel struct arg
+ std::vector<Record*>::iterator it =
+ std::find_if(records.begin(), records.end(), [=](Record* rec){
+ return rec->_dst == src0;
+ } );
+ if (it == records.end())
+ return;
+
+ //check if src1 is imm value
+ if (loadImms.find(src1) == loadImms.end())
+ return;
+
+ Record* rec = *it;
+ LoadImmInstruction *loadImm = loadImms[src1];
+ Immediate imm = loadImm->getImmediate();
+ int64_t newvalue = imm.getIntegerValue() + rec->_immv;
+
+ if (rec->_dst != rec->_arg) { //directly dervied from arg if they are equal
+ //change src0 to be the kernel struct arg
+ insn.setSrc(0, rec->_arg);
+
+ //change the value of src1
+ ImmediateIndex immIndex = fn->newImmediate(newvalue);
+ loadImm->setImmediateIndex(immIndex);
+ }
+ AddRecord(dst, rec->_arg, newvalue);
+ });
+ }
+
+ void foldFunctionStructArgConstOffset(Unit &unit, const std::string &functionName) {
+ FunctionStructArgConstOffsetFolder folder(unit);
+ folder.folding(functionName);
+ }
+
+} /* namespace ir */
+}
diff --git a/backend/src/ir/constopt.hpp b/backend/src/ir/constopt.hpp
new file mode 100644
index 00000000..f272637d
--- /dev/null
+++ b/backend/src/ir/constopt.hpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Guo Yejun <yejun.guo@intel.com>
+ */
+
+#ifndef __GBE_IR_CONSTOPT_HPP__
+#define __GBE_IR_CONSTOPT_HPP__
+
+namespace gbe {
+namespace ir {
+
+ // Structure to update
+ class Unit;
+
+ // TODO
+ void foldConstant(Unit &unit, const std::string &functionName);
+ void propagateConstant(Unit &unit, const std::string &functionName);
+
+ // for the following GEN IR, %41 is kernel argument (struct)
+ // the first LOAD will be mov, and the second LOAD will be indirect move
+ // (see lowerFunctionArguments). It hurts performance,
+ // and even impacts the correctness of reg liveness of indriect mov
+ //
+ // LOADI.uint64 %1114 72
+ // ADD.int64 %78 %41 %1114
+ // LOAD.int64.private.aligned {%79} %78 bti:255
+ // LOADI.int64 %1115 8
+ // ADD.int64 %1116 %78 %1115
+ // LOAD.int64.private.aligned {%80} %1116 bti:255
+ //
+ // this function folds the constants of 72 and 8 together,
+ // and so it will be direct mov.
+ // the GEN IR looks like:
+ // LOADI.int64 %1115 80
+ // ADD.int64 %1116 %41 %1115
+ void foldFunctionStructArgConstOffset(Unit &unit, const std::string &functionName);
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_LOWERING_HPP__ */
diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp
index e4aac088..f60d33f4 100644
--- a/backend/src/ir/context.cpp
+++ b/backend/src/ir/context.cpp
@@ -24,6 +24,7 @@
#include "ir/context.hpp"
#include "ir/unit.hpp"
#include "ir/lowering.hpp"
+#include "ir/constopt.hpp"
namespace gbe {
namespace ir {
@@ -82,6 +83,10 @@ namespace ir {
fn->sortLabels();
fn->computeCFG();
+ //TODO: do constant folding and propagation for GEN IR
+ //here as the first step, we just do constant folding for kernel struct args
+ foldFunctionStructArgConstOffset(unit, fn->getName());
+
// Spill function argument to the stack if required and identify which
// function arguments can use constant push
lowerFunctionArguments(unit, fn->getName());
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index a9156ffe..48590fd1 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -833,6 +833,9 @@ namespace ir {
INLINE Immediate getImmediate(const Function &fn) const {
return fn.getImmediate(immediateIndex);
}
+ INLINE void setImmediateIndex(ImmediateIndex immIndex) {
+ immediateIndex = immIndex;
+ }
INLINE Type getType(void) const { return this->type; }
bool wellFormed(const Function &fn, std::string &why) const;
INLINE void out(std::ostream &out, const Function &fn) const;
@@ -2445,6 +2448,10 @@ DECL_MEM_FN(MemInstruction, void, setBtiReg(Register reg), setBtiReg(reg))
return reinterpret_cast<const internal::LoadImmInstruction*>(this)->getImmediate(fn);
}
+ void LoadImmInstruction::setImmediateIndex(ImmediateIndex immIndex) {
+ reinterpret_cast<internal::LoadImmInstruction*>(this)->setImmediateIndex(immIndex);
+ }
+
///////////////////////////////////////////////////////////////////////////
// Implements the emission functions
///////////////////////////////////////////////////////////////////////////
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 8685dd4b..05c3e649 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -389,6 +389,7 @@ namespace ir {
public:
/*! Return the value stored in the instruction */
Immediate getImmediate(void) const;
+ void setImmediateIndex(ImmediateIndex immIndex);
/*! Return the type of the stored value */
Type getType(void) const;
/*! Return true if the given instruction is an instance of this class */