summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-09-19 18:41:06 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-09-19 18:41:16 +0800
commit31194fb5053bb48592c6702139fe3f9a13213ac8 (patch)
treedf3badf4774132cbd828e4b1c81f0be27df1bcd1
parentff3a1d864dcab170fb018fb47e7a0c45f110c950 (diff)
draft fix.loop_opt
./opencv_test_imgproc --gtest_filter=OCL_Filter/LaplacianTest.Accuracy/60 hang. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
-rw-r--r--backend/src/ir/function.cpp4
-rw-r--r--backend/src/ir/function.hpp5
-rw-r--r--backend/src/ir/structural_analysis.cpp49
-rw-r--r--src/cl_api.c6
4 files changed, 55 insertions, 9 deletions
diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
index deb65ef9..e9400c11 100644
--- a/backend/src/ir/function.cpp
+++ b/backend/src/ir/function.cpp
@@ -324,7 +324,9 @@ namespace ir {
BasicBlock::BasicBlock(Function &fn) : needEndif(true), needIf(true), endifLabel(0),
matchingEndifLabel(0), matchingElseLabel(0),
thisElseLabel(0), belongToStructure(false),
- isStructureExit(false), matchingStructureEntry(NULL),
+ isStructureExit(false), isLoopExit(false),
+ hasExtraBra(false),
+ matchingStructureEntry(NULL),
fn(fn) {
this->nextBlock = this->prevBlock = NULL;
}
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index d0b595ee..0f145c57 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -122,6 +122,11 @@ namespace ir {
* identified structure. so if isStructureExit is false then matchingStructureEntry
* is meaningless. */
bool isStructureExit;
+ /* This block is an exit point of a loop block. It may not be exit point of
+ the large structure block. */
+ bool isLoopExit;
+ /* This block has an extra branch in the end of the block. */
+ bool hasExtraBra;
BasicBlock *matchingStructureEntry;
/* variable liveout is for if-else structure liveness analysis. eg. we have an sequence of
* bbs of 0, 1, 2, 3, 4 and the CFG is as below:
diff --git a/backend/src/ir/structural_analysis.cpp b/backend/src/ir/structural_analysis.cpp
index 3210224c..cce3f6b1 100644
--- a/backend/src/ir/structural_analysis.cpp
+++ b/backend/src/ir/structural_analysis.cpp
@@ -59,20 +59,27 @@ namespace analysis
}
void ControlTree::handleSelfLoopNode(Node *loopnode, ir::LabelIndex& whileLabel)
{
+ //NodeList::iterator child_iter = (*it)->children.begin();
ir::BasicBlock *pbb = loopnode->getExit();
- ir::BranchInstruction* pinsn = static_cast<ir::BranchInstruction *>(pbb->getLastInstruction());
- ir::Register reg = pinsn->getPredicateIndex();
+ GBE_ASSERT(pbb->isLoopExit);
ir::BasicBlock::iterator it = pbb->end();
it--;
+ printf("block %d pbb %p node %p node2 %p \n", pbb->getLabelIndex(), pbb, loopnode, bbmap[pbb]);
+ if (pbb->hasExtraBra) {
+ printf("has extra BRA\n");
+ it--;
+ }
+ ir::BranchInstruction* pinsn = static_cast<ir::BranchInstruction *>(&*it);
+ ir::Register reg = pinsn->getPredicateIndex();
/* since this node is an while node, so we remove the BRA instruction at the bottom of the exit BB of 'node',
* and insert WHILE instead
*/
- pbb->erase(it);
whileLabel = pinsn->getLabelIndex();
ir::Instruction insn = ir::WHILE(whileLabel, reg);
ir::Instruction* p_new_insn = pbb->getParent().newInstruction(insn);
- pbb->append(*p_new_insn);
+ pbb->insertAt(it, *p_new_insn);
pbb->whileLabel = whileLabel;
+ pbb->erase(it);
}
/* recursive mark the bbs' variable needEndif, the bbs all belong to node.*/
@@ -257,12 +264,21 @@ namespace analysis
for(size_t i = 0; i < blocks.size(); ++i)
{
bbs[i] = blocks[i];
- if(bbs[i]->getLastInstruction()->getOpcode() != ir::OP_BRA && i != blocks.size() - 1)
+ if(i != blocks.size() -1 &&
+ (bbs[i]->getLastInstruction()->getOpcode() != ir::OP_BRA ||
+ (bbs[i]->isStructureExit && bbs[i]->isLoopExit)))
{
ir::Instruction insn = ir::BRA(bbs[i]->getNextBlock()->getLabelIndex());
ir::Instruction* pNewInsn = bbs[i]->getParent().newInstruction(insn);
bbs[i]->append(*pNewInsn);
+ printf("%d isexit %d type %d %d\n", bbs[i]->getLabelIndex(), bbs[i]->isStructureExit, bbs[i]->isLoopExit);
+ if (bbs[i]->isStructureExit && bbs[i]->isLoopExit) {
+ printf("create block %d \n", bbs[i]->getLabelIndex());
+ fflush(stdout);
+ bbs[i]->hasExtraBra = true;
+ }
}
+ printf("%d isexit %d type %d bpp %p\n", bbs[i]->getLabelIndex(), bbs[i]->isStructureExit, bbs[i]->isLoopExit, bbs[i]);
}
/* now, reorder the basic blocks to reduce the unconditional jump we inserted whose
@@ -337,6 +353,12 @@ namespace analysis
it--;
bbs[i]->erase(it);
+
+ if (bbs[i]->hasExtraBra) {
+ printf("erase block %d \n", bbs[i]->getLabelIndex());
+ fflush(stdout);
+ bbs[i]->hasExtraBra = false;
+ }
}
}
}
@@ -384,9 +406,9 @@ namespace analysis
case SelfLoop:
{
- NodeList::iterator child_iter = (*it)->children.begin();
ir::LabelIndex whilelabel;
- handleSelfLoopNode(*child_iter, whilelabel);
+ printf("node %p self loop children %p enxtry %p exit %p\n", *it, *(*it)->children.begin(), (*it)->getEntry(), (*it)->getExit());
+ handleSelfLoopNode(*it, whilelabel);
}
break;
@@ -739,6 +761,7 @@ namespace analysis
iter++;
}
+ printf("new blocks block %d %p node %p\n", p->getEntry()->getLabelIndex(), p->getEntry(), p);
return insertNode(p);
}
@@ -765,6 +788,8 @@ namespace analysis
if(node->canBeHandled == false || n->canBeHandled == false)
p->canBeHandled = false;
+// printf("new if then block %d %p node %p block %d\n", p->getEntry()->getLabelIndex(), p->getEntry(), p, node->getEntry()->getLabelIndex());
+ printf("1 block %d \n", node->getEntry()->getLabelIndex());
return insertNode(p);
}
@@ -785,6 +810,7 @@ namespace analysis
if(node->canBeHandled == false || m->canBeHandled == false)
p->canBeHandled = false;
+ printf("2 block %d m %d n %d, m succ %d\n", node->getEntry()->getLabelIndex(), m->getEntry()->getLabelIndex(), n->getEntry()->getLabelIndex(), (*(m->succs().begin()))->getEntry()->getLabelIndex());
return insertNode(p);
}
@@ -806,6 +832,7 @@ namespace analysis
n->canBeHandled == false)
p->canBeHandled = false;
+ printf("3 block %d \n", node->getEntry()->getLabelIndex());
return insertNode(p);
}
@@ -826,6 +853,7 @@ namespace analysis
m->canBeHandled == false ||
n->canBeHandled == false)
p->canBeHandled = false;
+ printf("4 block %d \n", node->getEntry()->getLabelIndex());
return insertNode(p);
}
}
@@ -867,7 +895,12 @@ namespace analysis
{
Node* p = new SelfLoopNode(node);
- p->canBeHandled = false;
+ node->getExit()->isLoopExit = true;
+ printf("loop node %p %d exit block %p \n", node, node->getExit()->getLabelIndex(), node->getExit());
+ if (node->getExit()->getLabelIndex() != 10 )
+ p->canBeHandled = false;
+ else
+ p->canBeHandled = true;
return insertNode(p);
}
diff --git a/src/cl_api.c b/src/cl_api.c
index 8a2e999b..133b524c 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2972,6 +2972,8 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
data->type = EnqueueNDRangeKernel;
data->queue = command_queue;
+ printf("run kernel %s \n", cl_kernel_get_name(kernel));
+ fflush(stdout);
if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
event, data, CL_COMMAND_NDRANGE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) {
if (event && (*event)->type != CL_COMMAND_USER
@@ -2981,6 +2983,10 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
err = cl_command_queue_flush(command_queue);
}
+
+ clFinish(command_queue);
+ printf("end kernel %s \n", cl_kernel_get_name(kernel));
+ fflush(stdout);
if(b_output_kernel_perf)
{