diff options
Diffstat (limited to 'src/gallium/drivers/r600/sb/sb_gcm.cpp')
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_gcm.cpp | 748 |
1 files changed, 748 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_gcm.cpp b/src/gallium/drivers/r600/sb/sb_gcm.cpp new file mode 100644 index 0000000000..9b04a1ec2c --- /dev/null +++ b/src/gallium/drivers/r600/sb/sb_gcm.cpp @@ -0,0 +1,748 @@ +/* + * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Vadim Girlin + */ + +#define GCM_DEBUG 0 + +#if GCM_DEBUG +#define GCM_DUMP(a) do { a } while(0); +#else +#define GCM_DUMP(a) +#endif + +#include <iostream> +#include <map> + +#include "sb_context.h" +#include "sb_shader.h" + +#include "sb_pass.h" +#include "sb_gcm.h" + +#include "sb_dump.h" + +namespace r600_sb { + +using std::cerr; + +int gcm::run() { + + GCM_DUMP( cerr << "==== GCM ==== \n"; sh.dump_ir(); ); + + collect_instructions(sh.root, true); + + init_def_count(uses, pending); + + for (node_iterator N, I = pending.begin(), E = pending.end(); + I != E; I = N) { + N = I; + ++N; + node *o = *I; + + GCM_DUMP( + cerr << "pending : "; + dump::dump_op(o); + cerr << "\n"; + ); + + if (td_is_ready(o)) { + + GCM_DUMP( + cerr << " ready: "; + dump::dump_op(o); + cerr << "\n"; + ); + pending.remove_node(o); + ready.push_back(o); + } else { + } + } + + sched_early(sh.root); + + if (!pending.empty()) { + cerr << "##### gcm_sched_early_pass: unscheduled ops:\n"; + dump::dump_op(pending.front()); + } + + assert(pending.empty()); + + GCM_DUMP( sh.dump_ir(); ); + + GCM_DUMP( cerr << "\n\n ############## gcm late\n\n"; ); + + collect_instructions(sh.root, false); + + init_use_count(uses, pending); + + sched_late(sh.root); + if (!pending.empty()) { + cerr << "##### gcm_sched_late_pass: unscheduled ops:\n"; + dump::dump_op(pending.front()); + } + + assert(ucs_level == 0); + assert(pending.empty()); + + return 0; +} + + +void gcm::collect_instructions(container_node *c, bool early_pass) { + if (c->is_bb()) { + + if (early_pass) { + for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { + node *n = *I; + if (n->flags & NF_DONT_MOVE) { + op_info &o = op_map[n]; + o.top_bb = o.bottom_bb = static_cast<bb_node*>(c); + } + } + } + + pending.append_from(c); + return; + } + + for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { + if (I->is_container()) { + collect_instructions(static_cast<container_node*>(*I), early_pass); + } + } +} + +void gcm::sched_early(container_node *n) { + + region_node *r = + (n->type == NT_REGION) ? static_cast<region_node*>(n) : NULL; + + if (r && r->loop_phi) { + sched_early(r->loop_phi); + } + + for (node_iterator I = n->begin(), E = n->end(); I != E; ++I) { + if (I->type == NT_OP) { + node *op = *I; + if (op->subtype == NST_PHI) { + td_release_uses(op->dst); + } + } else if (I->is_container()) { + if (I->subtype == NST_BB) { + bb_node* bb = static_cast<bb_node*>(*I); + td_sched_bb(bb); + } else { + sched_early(static_cast<container_node*>(*I)); + } + } + } + + if (r && r->phi) { + sched_early(r->phi); + } +} + +void gcm::td_schedule(bb_node *bb, node *n) { + GCM_DUMP( + cerr << "scheduling : "; + dump::dump_op(n); + cerr << "\n"; + ); + td_release_uses(n->dst); + + bb->push_back(n); + + op_map[n].top_bb = bb; + +} + +void gcm::td_sched_bb(bb_node* bb) { + GCM_DUMP( + cerr << "td scheduling BB_" << bb->id << "\n"; + ); + + while (!ready.empty()) { + for (sq_iterator N, I = ready.begin(), E = ready.end(); I != E; + I = N) { + N = I; ++N; + td_schedule(bb, *I); + ready.erase(I); + } + } +} + +bool gcm::td_is_ready(node* n) { + return uses[n] == 0; +} + +void gcm::td_release_val(value *v) { + + GCM_DUMP( + cerr << "td checking uses: "; + dump::dump_val(v); + cerr << "\n"; + ); + + use_info *u = v->uses; + while (u) { + if (u->op->parent != &pending) { + u = u->next; + continue; + } + + GCM_DUMP( + cerr << "td used in "; + dump::dump_op(u->op); + cerr << "\n"; + ); + + if (--uses[u->op] == 0) { + GCM_DUMP( + cerr << "td released : "; + dump::dump_op(u->op); + cerr << "\n"; + ); + + pending.remove_node(u->op); + ready.push_back(u->op); + } + u = u->next; + } + +} + +void gcm::td_release_uses(vvec& v) { + for (vvec::iterator I = v.begin(), E = v.end(); I != E; ++I) { + value *v = *I; + if (!v) + continue; + + if (v->is_rel()) + td_release_uses(v->mdef); + else + td_release_val(v); + } +} + +void gcm::sched_late(container_node *n) { + + bool stack_pushed = false; + + if (n->is_depart()) { + depart_node *d = static_cast<depart_node*>(n); + push_uc_stack(); + stack_pushed = true; + bu_release_phi_defs(d->target->phi, d->dep_id); + } else if (n->is_repeat()) { + repeat_node *r = static_cast<repeat_node*>(n); + assert(r->target->loop_phi); + push_uc_stack(); + stack_pushed = true; + bu_release_phi_defs(r->target->loop_phi, r->rep_id); + } + + for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) { + if (I->is_container()) { + if (I->subtype == NST_BB) { + bb_node* bb = static_cast<bb_node*>(*I); + bu_sched_bb(bb); + } else { + sched_late(static_cast<container_node*>(*I)); + } + } + } + + if (n->type == NT_IF) { + if_node *f = static_cast<if_node*>(n); + if (f->cond) + pending_defs.push_back(f->cond); + } else if (n->type == NT_REGION) { + region_node *r = static_cast<region_node*>(n); + if (r->loop_phi) + bu_release_phi_defs(r->loop_phi, 0); + } + + if (stack_pushed) + pop_uc_stack(); + +} + +void gcm::bu_sched_bb(bb_node* bb) { + GCM_DUMP( + cerr << "bu scheduling BB_" << bb->id << "\n"; + ); + + bu_bb = bb; + + if (!pending_nodes.empty()) { + GCM_DUMP( + cerr << "pending nodes:\n"; + ); + + // TODO consider sorting the exports by array_base, + // possibly it can improve performance + + for (node_list::iterator I = pending_nodes.begin(), + E = pending_nodes.end(); I != E; ++I) { + bu_release_op(*I); + } + pending_nodes.clear(); + GCM_DUMP( + cerr << "pending nodes processed...\n"; + ); + } + + + if (!pending_defs.empty()) { + for (vvec::iterator I = pending_defs.begin(), E = pending_defs.end(); + I != E; ++I) { + bu_release_val(*I); + } + pending_defs.clear(); + } + + for (sched_queue::iterator N, I = ready_above.begin(), E = ready_above.end(); + I != E; I = N) { + N = I; + ++N; + node *n = *I; + if (op_map[n].bottom_bb == bb) { + add_ready(*I); + ready_above.erase(I); + } + } + + unsigned cnt_ready[SQ_NUM]; + + container_node *clause = NULL; + unsigned last_inst_type = ~0; + unsigned last_count = 0; + + bool s = true; + while (s) { + node *n; + + s = false; + + unsigned ready_mask = 0; + + for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) { + if (!bu_ready[sq].empty() || !bu_ready_next[sq].empty()) + ready_mask |= (1 << sq); + } + + if (!ready_mask) { + for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) { + if (!bu_ready_early[sq].empty()) { + node *n = bu_ready_early[sq].front(); + bu_ready_early[sq].pop_front(); + bu_ready[sq].push_back(n); + break; + } + } + } + + for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) { + + if (!bu_ready_next[sq].empty()) + bu_ready[sq].splice(bu_ready[sq].end(), bu_ready_next[sq]); + + cnt_ready[sq] = bu_ready[sq].size(); + + if ((sq == SQ_TEX || sq == SQ_VTX) && + cnt_ready[sq] < ctx.max_fetch/2 && + !bu_ready_next[SQ_ALU].empty()) { + sq = SQ_ALU; + --sq; + continue; + } + + while (!bu_ready[sq].empty()) { + + if (last_inst_type != sq) { + clause = NULL; + last_count = 0; + last_inst_type = sq; + } + + n = bu_ready[sq].front(); + + // real count (e.g. SAMPLE_G will be expanded to 3 instructions, + // 2 SET_GRAD_ + 1 SAMPLE_G + unsigned ncnt = 1; + if (n->is_fetch_inst() && n->src.size() == 12) { + ncnt = 3; + } + + if ((sq == SQ_TEX || sq == SQ_VTX) && + ((last_count >= ctx.max_fetch/2 && + check_alu_ready_count(24)) || + last_count + ncnt > ctx.max_fetch)) + break; + else if (sq == SQ_CF && last_count > 4 && + check_alu_ready_count(24)) + break; + + bu_ready[sq].pop_front(); + + if (sq != SQ_CF) { + if (!clause) { + clause = sh.create_clause(sq == SQ_ALU ? + NST_ALU_CLAUSE : + sq == SQ_TEX ? NST_TEX_CLAUSE : + NST_VTX_CLAUSE); + bb->push_front(clause); + } + } else { + clause = bb; + } + + bu_schedule(clause, n); + s = true; + last_count += ncnt; + } + } + } + + bu_bb = NULL; + + GCM_DUMP( + cerr << "bu finished scheduling BB_" << bb->id << "\n"; + ); +} + +void gcm::bu_release_defs(vvec& v, bool src) { + for (vvec::reverse_iterator I = v.rbegin(), E = v.rend(); I != E; ++I) { + value *v = *I; + if (!v || v->is_readonly()) + continue; + + if (v->is_rel()) { + if (!v->rel->is_readonly()) + bu_release_val(v->rel); + bu_release_defs(v->muse, true); + } else if (src) + bu_release_val(v); + } +} + +void gcm::push_uc_stack() { + GCM_DUMP( + cerr << "pushing use count stack prev_level " << ucs_level + << " new level " << (ucs_level + 1) << "\n"; + ); + ++ucs_level; + if (ucs_level == nuc_stk.size()) { + nuc_stk.resize(ucs_level + 1); + } + else { + nuc_stk[ucs_level].clear(); + } +} + +bool gcm::bu_is_ready(node* n) { + nuc_map &cm = nuc_stk[ucs_level]; + nuc_map::iterator F = cm.find(n); + unsigned uc = (F == cm.end() ? 0 : F->second); + return uc == uses[n]; +} + +void gcm::bu_schedule(container_node* c, node* n) { + GCM_DUMP( + cerr << "bu scheduling : "; + dump::dump_op(n); + cerr << "\n"; + ); + + assert(op_map[n].bottom_bb == bu_bb); + + bu_release_defs(n->src, true); + bu_release_defs(n->dst, false); + + c->push_front(n); +} + +void gcm::dump_uc_stack() { + cerr << "##### uc_stk start ####\n"; + for (unsigned l = 0; l <= ucs_level; ++l) { + nuc_map &m = nuc_stk[l]; + + cerr << "nuc_stk[" << l << "] : @" << &m << "\n"; + + for (nuc_map::iterator I = m.begin(), E = m.end(); I != E; ++I) { + cerr << " uc " << I->second << " for "; + dump::dump_op(I->first); + cerr << "\n"; + } + } + cerr << "##### uc_stk end ####\n"; +} + +void gcm::pop_uc_stack() { + nuc_map &pm = nuc_stk[ucs_level]; + --ucs_level; + nuc_map &cm = nuc_stk[ucs_level]; + + GCM_DUMP( + cerr << "merging use stack from level " << (ucs_level+1) + << " to " << ucs_level << "\n"; + ); + + for (nuc_map::iterator N, I = pm.begin(), E = pm.end(); I != E; ++I) { + node *n = I->first; + + GCM_DUMP( + cerr << " " << cm[n] << " += " << I->second << " for "; + dump::dump_op(n); + cerr << "\n"; + ); + + unsigned uc = cm[n] += I->second; + + if (n->parent == &pending && uc == uses[n]) { + cm.erase(n); + pending_nodes.push_back(n); + GCM_DUMP( + cerr << "pushed pending_node due to stack pop "; + dump::dump_op(n); + cerr << "\n"; + ); + } + } +} + +void gcm::bu_find_best_bb(node *n, op_info &oi) { + + GCM_DUMP( + cerr << " find best bb : "; + dump::dump_op(n); + cerr << "\n"; + ); + + if (oi.bottom_bb) + return; + + // don't hoist generated copies + if (n->flags & NF_DONT_HOIST) { + oi.bottom_bb = bu_bb; + return; + } + + bb_node* best_bb = bu_bb; + bb_node* top_bb = oi.top_bb; + assert(oi.top_bb && !oi.bottom_bb); + + node *c = best_bb; + + // FIXME top_bb may be located inside the loop so we'll never enter it + // in the loop below, and the instruction will be incorrectly placed at the + // beginning of the shader. + // For now just check if top_bb's loop_level is higher than of + // current bb and abort the search for better bb in such case, + // but this problem may require more complete (and more expensive) fix + if (top_bb->loop_level <= best_bb->loop_level) { + while (c && c != top_bb) { + + if (c->prev) { + c = c->prev; + } else { + c = c->parent; + if (!c) + break; + continue; + } + + if (c->subtype == NST_BB) { + bb_node *bb = static_cast<bb_node*>(c); + if (bb->loop_level < best_bb->loop_level) + best_bb = bb; + } + } + } + + oi.bottom_bb = best_bb; +} + +void gcm::add_ready(node *n) { + queue_id sq = sh.get_queue_id(n); + if (n->flags & NF_SCHEDULE_EARLY) + bu_ready_early[sq].push_back(n); + else + bu_ready_next[sq].push_back(n); +} + +void gcm::bu_release_op(node * n) { + op_info &oi = op_map[n]; + + GCM_DUMP( + cerr << " bu release op "; + dump::dump_op(n); + ); + + nuc_stk[ucs_level].erase(n); + pending.remove_node(n); + + bu_find_best_bb(n, oi); + + if (oi.bottom_bb == bu_bb) { + GCM_DUMP( cerr << " ready\n";); + add_ready(n); + } else { + GCM_DUMP( cerr << " ready_above\n";); + ready_above.push_back(n); + } +} + +void gcm::bu_release_phi_defs(container_node* p, unsigned op) +{ + for (node_riterator I = p->rbegin(), E = p->rend(); I != E; ++I) { + node *o = *I; + value *v = o->src[op]; + if (v && !v->is_readonly()) + pending_defs.push_back(o->src[op]); + + } +} + +unsigned gcm::get_uc_vec(vvec &vv) { + unsigned c = 0; + for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { + value *v = *I; + if (!v) + continue; + + if (v->is_rel()) + c += get_uc_vec(v->mdef); + else + c += v->use_count(); + } + return c; +} + +void gcm::init_use_count(nuc_map& m, container_node &s) { + m.clear(); + for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) { + node *n = *I; + unsigned uc = get_uc_vec(n->dst); + GCM_DUMP( + cerr << "uc " << uc << " "; + dump::dump_op(n); + cerr << "\n"; + ); + if (!uc) { + pending_nodes.push_back(n); + GCM_DUMP( + cerr << "pushed pending_node in init "; + dump::dump_op(n); + cerr << "\n"; + ); + + } else + m[n] = uc; + } +} + +void gcm::bu_release_val(value* v) { + node *n = v->any_def(); + + if (n && n->parent == &pending) { + unsigned uc = ++nuc_stk[ucs_level][n]; + unsigned uc2 = uses[n]; + + GCM_DUMP( + cerr << "release val "; + dump::dump_val(v); + cerr << " for node "; + dump::dump_op(n); + cerr << " new uc=" << uc << ", total " << uc2 << "\n"; + ); + + if (uc == uc2) + bu_release_op(n); + } + +} + +void gcm::init_def_count(nuc_map& m, container_node& s) { + m.clear(); + for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) { + node *n = *I; + unsigned dc = get_dc_vec(n->src, true) + get_dc_vec(n->dst, false); + m[n] = dc; + + GCM_DUMP( + cerr << "dc " << dc << " "; + dump::dump_op(n); + cerr << "\n"; + ); + } +} + +unsigned gcm::get_dc_vec(vvec& vv, bool src) { + unsigned c = 0; + for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { + value *v = *I; + if (!v || v->is_readonly()) + continue; + + if (v->is_rel()) { + c += v->rel->def != NULL; + c += get_dc_vec(v->muse, true); + } + else if (src) { + c += v->def != NULL; + c += v->adef != NULL; + } + } + return c; +} + +unsigned gcm::real_alu_count(sched_queue& q, unsigned max) { + sq_iterator I(q.begin()), E(q.end()); + unsigned c = 0; + + while (I != E && c < max) { + node *n = *I; + if (n->is_alu_inst()) { + if (!n->is_copy_mov() || !n->src[0]->is_any_gpr()) + ++c; + } else if (n->is_alu_packed()) { + c += static_cast<container_node*>(n)->count(); + } + ++I; + } + + return c; +} + +bool gcm::check_alu_ready_count(unsigned threshold) { + unsigned r = real_alu_count(bu_ready[SQ_ALU], threshold); + if (r >= threshold) + return true; + r += real_alu_count(bu_ready_next[SQ_ALU], threshold - r); + return r >= threshold; +} + +} // namespace r600_sb |