diff options
author | Vadim Girlin <vadimgirlin@gmail.com> | 2013-07-23 22:45:35 +0400 |
---|---|---|
committer | Vadim Girlin <vadimgirlin@gmail.com> | 2013-07-23 22:45:35 +0400 |
commit | 439e0cf1975e0b8bb8c0942da5b9ec6e24e61a24 (patch) | |
tree | 8ad28b27435682cd8cb5af50a99f635ac3278383 | |
parent | 759731de4331eb92ea47e005b79bd72fd44b3ab4 (diff) |
wip
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 27 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 8 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_core.cpp | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_ir.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_ra_init.cpp | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_sched.cpp | 78 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_sched.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_shader.cpp | 239 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_shader.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_ssa_builder.cpp | 5 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_tgsi.cpp | 591 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_tgsi.h | 21 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_valtable.cpp | 2 |
13 files changed, 725 insertions, 285 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp index 21432912e4..ffeb08414b 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp @@ -32,6 +32,8 @@ #define FBC_DUMP(q) #endif +#include "cmath" + #include "sb_bc.h" #include "sb_shader.h" #include "sb_pass.h" @@ -337,13 +339,28 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) { literal lv = v->literal_value; src.chan = 0; + if (src.abs) { + lv.f = fabs(lv.f); + src.abs = 0; + } + if (src.neg) { + lv.f = -lv.f; + src.neg = 0; + } + if (lv == literal(0)) src.sel = ALU_SRC_0; else if (lv == literal(0.5f)) src.sel = ALU_SRC_0_5; - else if (lv == literal(1.0f)) + else if (lv == literal(-0.5f)) { + src.sel = ALU_SRC_0_5; + src.neg = 1; + } else if (lv == literal(1.0f)) + src.sel = ALU_SRC_1; + else if (lv == literal(-1.0f)) { src.sel = ALU_SRC_1; - else if (lv == literal(1)) + src.neg = 1; + } else if (lv == literal(1)) src.sel = ALU_SRC_1_INT; else if (lv == literal(-1)) src.sel = ALU_SRC_M_1_INT; @@ -481,9 +498,11 @@ void bc_finalizer::finalize_fetch(fetch_node* f) { value *v = f->src[chan]; - if (!v || v->is_undef()) { + if (!v) sel = SEL_MASK; - } else if (v->is_const()) { + else if (v->is_undef()) + sel = SEL_0; + else if (v->is_const()) { literal l = v->literal_value; if (l == literal(0)) sel = SEL_0; diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index 0b1d7cb919..320a0811a0 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -382,6 +382,14 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { bc_alu_src &src = n->bc.src[s]; if (src.sel == ALU_SRC_LITERAL) { + if (src.abs) { + src.value.f = fabs(src.value.f); + src.abs = 0; + } + if (src.neg) { + src.value.f = -src.value.f; + src.neg = 0; + } n->src[s] = sh->get_const_value(src.value); } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) { unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ? diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp index 696e10b68a..08dc032f07 100644 --- a/src/gallium/drivers/r600/sb/sb_core.cpp +++ b/src/gallium/drivers/r600/sb/sb_core.cpp @@ -206,7 +206,7 @@ int r600_sb_compile_tgsi(struct r600_context *rctx, time_start = os_time_get_nano(); } - unsigned shader_id = r600_next_shader_id(); + unsigned shader_id = bc->debug_id; SB_DUMP_STAT( sblog << "\nsb: shader " << shader_id << "\n"; ); // translate from tgsi diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h index f5ecad6b3f..716af597c5 100644 --- a/src/gallium/drivers/r600/sb/sb_ir.h +++ b/src/gallium/drivers/r600/sb/sb_ir.h @@ -493,7 +493,7 @@ protected: value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0) : kind(k), flags(), rel(), array(), - version(ver), select(select), pin_gpr(select), gpr(), + version(ver), select(select), pin_gpr(), gpr(), gvn_source(), ghash(), def(), adef(), uses(), constraint(), chunk(), literal_value(), uid(sh_id) {} @@ -560,7 +560,8 @@ public: } bool is_any_gpr() { - return (kind == VLK_REG || kind == VLK_TEMP || is_tgsi_value()); + return (!rel && + (kind == VLK_REG || kind == VLK_TEMP || is_tgsi_value())); } bool is_agpr() { @@ -604,8 +605,10 @@ public: && literal_value != literal(0) && literal_value != literal(1) && literal_value != literal(-1) - && literal_value != literal(0.5) - && literal_value != literal(1.0); + && literal_value != literal(0.5f) + && literal_value != literal(-0.5f) + && literal_value != literal(1.0f) + && literal_value != literal(-1.0f); } void add_use(node *n, use_kind kind, int arg); diff --git a/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/src/gallium/drivers/r600/sb/sb_ra_init.cpp index 4da556032f..856a2d9a46 100644 --- a/src/gallium/drivers/r600/sb/sb_ra_init.cpp +++ b/src/gallium/drivers/r600/sb/sb_ra_init.cpp @@ -349,9 +349,7 @@ void ra_init::process_op(node* n) { break; } } - } - - if (n->is_fetch_inst() || n->is_cf_inst()) { + } else if (n->is_fetch_inst() || n->is_cf_inst()) { for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) { value *v = *I; if (v && v->is_sgpr()) diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp index f0e41f5863..fd0f761549 100644 --- a/src/gallium/drivers/r600/sb/sb_sched.cpp +++ b/src/gallium/drivers/r600/sb/sb_sched.cpp @@ -939,9 +939,10 @@ void post_scheduler::update_live(node *n, val_set *born) { void post_scheduler::process_group() { alu_group_tracker &rt = alu.grp(); - val_set vals_born; + prev_array_read.clear(); + recolor_locals(); PSC_DUMP( @@ -956,6 +957,7 @@ void post_scheduler::process_group() { continue; update_live(n, &vals_born); + update_prev_array_read(n); } PSC_DUMP( @@ -1014,7 +1016,10 @@ void post_scheduler::schedule_alu(container_node *c) { prev_regmap = regmap; if (!prepare_alu_group()) { - if (alu.current_ar) { + if (latency_check_failed) { + emit_nop_group(); + continue; + } else if (alu.current_ar) { emit_load_ar(); continue; } else @@ -1263,6 +1268,11 @@ bool post_scheduler::map_src_val(value *v) { return true; sel_chan gpr = v->get_final_gpr(); + + PSC_DUMP( + sblog << "map src " << *v << " to " << gpr << "\n"; + ); + rv_map::iterator F = regmap.find(gpr); value *c = NULL; if (F != regmap.end()) { @@ -1436,6 +1446,11 @@ unsigned post_scheduler::try_add_instruction(node *n) { alu_group_tracker &rt = alu.grp(); +#if 0 // this seems not a problem so far at least on evergreen + if (!check_latency(n)) + return 0; +#endif + unsigned avail_slots = rt.avail_slots(); if (n->is_alu_packed()) { @@ -1606,6 +1621,8 @@ bool post_scheduler::prepare_alu_group() { alu_group_tracker &rt = alu.grp(); + latency_check_failed = false; + unsigned i1 = 0; PSC_DUMP( @@ -1634,7 +1651,6 @@ bool post_scheduler::prepare_alu_group() { sblog << "\n"; ); - unsigned cnt = try_add_instruction(n); if (!cnt) @@ -1970,4 +1986,60 @@ void rp_gpr_tracker::dump() { } } +void post_scheduler::update_prev_array_read(alu_node* n) { + for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) { + value *v = *I; + + if (!v || !v->array) + continue; + + prev_array_read.push_back(v); + } +} + +bool post_scheduler::check_latency(node* n) { + for (vvec::iterator I = n->dst.begin(), E = n->dst.end(); I != E; ++I) { + value *d = *I; + + if (!d || !d->array) + continue; + + if (!check_value_latency(d)) + return false; + } + return true; +} + +bool post_scheduler::check_value_latency(value* v) { + for (vvec::iterator I = prev_array_read.begin(), E = prev_array_read.end(); + I != E; ++I) { + value *r = *I; + + if (r->array == v->array) { + bool rel_write = v->is_rel(); + bool rel_read = r->is_rel(); + + if (rel_write ^ rel_read) { + latency_check_failed = true; + return false; + } + } + } + return true; +} + +void post_scheduler::emit_nop_group() { + alu_node * a = sh.create_alu(); + a->bc.set_op(ALU_OP0_NOP); + + alu_group_tracker &rt = alu.grp(); + if (!rt.try_reserve(a)) { + sblog << "can't emit NOP group : "; + dump::dump_op(a); + sblog << "\n"; + } + + alu.emit_group(); +} + } // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_sched.h b/src/gallium/drivers/r600/sb/sb_sched.h index a74484f50b..40e8b15c9d 100644 --- a/src/gallium/drivers/r600/sb/sb_sched.h +++ b/src/gallium/drivers/r600/sb/sb_sched.h @@ -254,11 +254,15 @@ class post_scheduler : public pass { val_set cleared_interf; + vvec prev_array_read; + bool latency_check_failed; + public: post_scheduler(shader &sh) : pass(sh), ready(), ready_copies(), pending(), cur_bb(), - live(), ucm(), alu(sh), regmap(), cleared_interf() {} + live(), ucm(), alu(sh), regmap(), cleared_interf(), + prev_array_read(), latency_check_failed() {} virtual int run(); void run_on(container_node *n); @@ -317,6 +321,11 @@ public: void emit_clause(); void process_ready_copies(); + + void update_prev_array_read(alu_node *n); + bool check_latency(node *n); + bool check_value_latency(value *v); + void emit_nop_group(); }; } // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp index 2be117df2e..24433649e7 100644 --- a/src/gallium/drivers/r600/sb/sb_shader.cpp +++ b/src/gallium/drivers/r600/sb/sb_shader.cpp @@ -31,23 +31,22 @@ namespace r600_sb { shader::shader(sb_context &sctx, shader_target t, unsigned id, bool direct_tgsi) -: ctx(sctx), next_temp_value_index(temp_regid_offset), pred_sels(), - regions(), inputs(), undef(), val_pool(sizeof(value)), - pool(), all_nodes(), bc(sctx.hw_class_bit()), src_stats(), opt_stats(), - errors(), optimized(), id(id), - coal(*this), bbs(), - target(t), vt(ex), ex(*this), root(), - compute_interferences(), - has_alu_predication(), uses_gradients(), safe_math(), ngpr(), nstack(), - direct_tgsi(direct_tgsi) {} + : ctx(sctx), next_temp_value_index(temp_regid_offset), pred_sels(), + regions(), inputs(), undef(), val_pool(sizeof(value)), pool(), + all_nodes(), bc(sctx.hw_class_bit()), src_stats(), opt_stats(), + errors(), optimized(), id(id), coal(*this), bbs(), target(t), + vt(ex), ex(*this), root(), compute_interferences(), + has_alu_predication(), uses_gradients(), safe_math(), ngpr(), + nstack(), direct_tgsi(direct_tgsi) { +} bool shader::assign_slot(alu_node* n, alu_node *slots[5]) { unsigned slot_flags = ctx.alu_slots(n->bc.op); unsigned slot = n->bc.dst_chan; - if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) && - (slot_flags & AF_S)) + if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) + && (slot_flags & AF_S)) slot = SLOT_TRANS; if (slots[slot]) @@ -59,7 +58,7 @@ bool shader::assign_slot(alu_node* n, alu_node *slots[5]) { } void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, - bool src) { + bool src) { unsigned chan = 0; while (comp_mask) { if (comp_mask & 1) { @@ -72,7 +71,7 @@ void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, if (v->array && !v->array->gpr) { // if pinned value can be accessed with indirect addressing // pin the entire array to its original location - v->array->gpr = v->array->pin_gpr; + v->array->gpr = v->array->base_sel; } vec.push_back(v); } @@ -81,16 +80,49 @@ void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, } } +void shader::add_pinned_inputs(vvec& vec, value_kind kind, unsigned sel, + unsigned comp_mask, bool src, + unsigned pin_gpr_sel) { + unsigned chan = 0; + while (comp_mask) { + if (comp_mask & 1) { + value *v = get_reg_value(kind, src, sel, chan, false); + if (!v->array) { + v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN); + v->gpr = v->pin_gpr = sel_chan(pin_gpr_sel, chan); + v->fix(); + } +/* if (v->array && !v->array->gpr) { + // if pinned value can be accessed with indirect addressing + // pin the entire array to its original location + v->array->gpr = sel_chan( + pin_gpr_sel - (sel - v->array->base_sel), chan); + } +*/ vec.push_back(v); + } + comp_mask >>= 1; + ++chan; + } +} + cf_node* shader::create_clause(node_subtype nst) { cf_node *n = create_cf(); n->subtype = nst; switch (nst) { - case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break; - case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break; - case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break; - default: assert(!"invalid clause type"); break; + case NST_ALU_CLAUSE: + n->bc.set_op(CF_OP_ALU); + break; + case NST_TEX_CLAUSE: + n->bc.set_op(CF_OP_TEX); + break; + case NST_VTX_CLAUSE: + n->bc.set_op(CF_OP_VTX); + break; + default: + assert(!"invalid clause type"); + break; } n->bc.barrier = 1; @@ -127,9 +159,11 @@ alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) { return n; } -value* shader::get_value(value_kind kind, sel_chan id, - unsigned version) { - unsigned key = (kind << 28) | (version << 16) | id; +value* shader::get_value(value_kind kind, sel_chan id, unsigned version) { + unsigned key = (kind << 28) | (version << 14) | id; + assert((id & ((1 << 14) - 1)) == id); + assert((version & ((1 << 14) - 1)) == version); + value_map::iterator i = reg_values.find(key); if (i != reg_values.end()) { return i->second; @@ -149,7 +183,7 @@ void shader::fill_array_values(rel_array *a, vvec &vv) { vv.resize(sz); for (unsigned i = 0; i < a->array_size; ++i) { vv[i] = get_reg_value(a->kind, true, a->base_sel.sel() + i, - a->base_sel.chan(), false); + a->base_sel.chan(), false); } } @@ -176,19 +210,19 @@ value* shader::get_reg_value(value_kind kind, bool src, unsigned sel, return v; } -value* shader::create_temp_value() { - sel_chan id(++next_temp_value_index, 0); +value* shader::create_temp_value(int chan) { + sel_chan id(++next_temp_value_index, chan); return get_value(VLK_TEMP, id, 0); } value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) { return get_ro_value(kcache_values, VLK_KCACHE, - sel_chan((bank << 12) | index, chan)); + sel_chan((bank << 12) | index, chan)); } void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) { if (inputs.size() <= gpr) - inputs.resize(gpr+1); + inputs.resize(gpr + 1); shader_input &i = inputs[gpr]; i.preloaded = preloaded; @@ -209,8 +243,8 @@ void shader::init_call_fs(cf_node* cf) { assert(target == TARGET_VS); - for(inputs_vec::const_iterator I = inputs.begin(), - E = inputs.end(); I != E; ++I, ++gpr) { + for (inputs_vec::const_iterator I = inputs.begin(), E = inputs.end(); + I != E; ++I, ++gpr) { if (!I->preloaded) add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false); else @@ -225,7 +259,8 @@ void shader::set_undef(val_set& s) { val_set &vs = s; - for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) { + for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; + ++I) { value *v = *I; assert(!v->is_readonly() && !v->is_rel()); @@ -260,14 +295,14 @@ alu_node* shader::create_alu() { alu_group_node* shader::create_alu_group() { alu_group_node* n = - new (pool.allocate(sizeof(alu_group_node))) alu_group_node(); + new (pool.allocate(sizeof(alu_group_node))) alu_group_node(); all_nodes.push_back(n); return n; } alu_packed_node* shader::create_alu_packed() { alu_packed_node* n = - new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node(); + new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node(); all_nodes.push_back(n); return n; } @@ -288,33 +323,34 @@ fetch_node* shader::create_fetch() { } region_node* shader::create_region() { - region_node *n = new (pool.allocate(sizeof(region_node))) - region_node(regions.size()); + region_node *n = new (pool.allocate(sizeof(region_node))) region_node( + regions.size()); regions.push_back(n); all_nodes.push_back(n); return n; } depart_node* shader::create_depart(region_node* target) { - depart_node* n = new (pool.allocate(sizeof(depart_node))) - depart_node(target, target->departs.size()); + depart_node* n = new (pool.allocate(sizeof(depart_node))) depart_node( + target, target->departs.size()); target->departs.push_back(n); all_nodes.push_back(n); return n; } repeat_node* shader::create_repeat(region_node* target) { - repeat_node* n = new (pool.allocate(sizeof(repeat_node))) - repeat_node(target, target->repeats.size() + 1); + repeat_node* n = new (pool.allocate(sizeof(repeat_node))) repeat_node( + target, target->repeats.size() + 1); target->repeats.push_back(n); all_nodes.push_back(n); return n; } container_node* shader::create_container(node_type nt, node_subtype nst, - node_flags flags) { - container_node *n = new (pool.allocate(sizeof(container_node))) - container_node(nt, nst, flags); + node_flags flags) { + container_node *n = + new (pool.allocate(sizeof(container_node))) container_node(nt, nst, + flags); all_nodes.push_back(n); return n; } @@ -342,12 +378,12 @@ value* shader::get_const_value(const literal &v) { } shader::~shader() { - for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end(); - I != E; ++I) + for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end(); I != E; + ++I) (*I)->~node(); for (rel_array_vec::iterator I = rel_arrays.begin(), E = rel_arrays.end(); - I != E; ++I) { + I != E; ++I) { delete *I; } } @@ -371,29 +407,32 @@ value* shader::get_value_version(value* v, unsigned ver) { rel_array* shader::get_rel_array(value_kind kind, unsigned sel, unsigned chan) { - for (regarray_vec::iterator I = rel_arrays.begin(), - E = rel_arrays.end(); I != E; ++I) { + for (regarray_vec::iterator I = rel_arrays.begin(), E = rel_arrays.end(); + I != E; ++I) { rel_array* a = *I; if (kind != a->kind) continue; unsigned achan = a->base_sel.chan(); unsigned areg = a->base_sel.sel(); - if (achan == chan && (sel >= areg && sel < areg+a->array_size)) + if (achan == chan && (sel >= areg && sel < areg + a->array_size)) return a; } return NULL; } -void shader::add_rel_array(value_kind kind, unsigned sel_start, unsigned sel_count, - unsigned comp_mask, unsigned array_id) { +void shader::add_rel_array(value_kind kind, unsigned sel_start, + unsigned sel_count, unsigned comp_mask, + unsigned array_id) { unsigned chan = 0; while (comp_mask) { if (comp_mask & 1) { - rel_array *a = new rel_array(kind, - sel_chan(sel_start, chan), sel_count, array_id); + rel_array *a = new rel_array(kind, sel_chan(sel_start, chan), + sel_count, array_id); - SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_sel - << " [" << a->array_size << "]\n"; + SB_DUMP_PASS( + sblog << "add_gpr_array: @" << a->base_sel << " [" + << a->array_size << "]\n" + ; ); rel_arrays.push_back(a); @@ -429,13 +468,18 @@ std::string shader::get_full_target_name() { const char* shader::get_shader_target_name() { switch (target) { - case TARGET_VS: return "VS"; - case TARGET_PS: return "PS"; - case TARGET_GS: return "GS"; - case TARGET_COMPUTE: return "COMPUTE"; - case TARGET_FETCH: return "FETCH"; - default: - return "INVALID_TARGET"; + case TARGET_VS: + return "VS"; + case TARGET_PS: + return "PS"; + case TARGET_GS: + return "GS"; + case TARGET_COMPUTE: + return "COMPUTE"; + case TARGET_FETCH: + return "FETCH"; + default: + return "INVALID_TARGET"; } } @@ -452,7 +496,6 @@ void shader::simplify_dep_rep(node* dr) { dr->parent->cut(dr->next, NULL); } - // FIXME this is used in some places as the max non-temp gpr, // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead. unsigned shader::first_temp_gpr() { @@ -524,10 +567,8 @@ void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) { if (inside_bb && !last_inside_bb) bb_start = I; else if (!inside_bb) { - if (last_inside_bb - && I->type != NT_REPEAT - && I->type != NT_DEPART - && I->type != NT_IF) { + if (last_inside_bb && I->type != NT_REPEAT && I->type != NT_DEPART + && I->type != NT_IF) { bb_node *bb = create_bb(bbs.size(), loop_level); bbs.push_back(bb); n->insert_node_before(*bb_start, bb); @@ -543,7 +584,7 @@ void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) { } create_bbs(static_cast<container_node*>(k), bbs, - loop_level + loop); + loop_level + loop); } } @@ -557,7 +598,7 @@ void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) { bb_node *bb = create_bb(bbs.size(), loop_level); bbs.push_back(bb); if (n->empty()) - n->push_back(bb); + n->push_back(bb); else { n->insert_node_before(*bb_start, bb); if (bb_start != n->end()) @@ -582,22 +623,22 @@ void shader::expand_bbs(bbs_vec &bbs) { sched_queue_id shader::get_queue_id(node* n) { switch (n->subtype) { - case NST_ALU_INST: - case NST_ALU_PACKED_INST: - case NST_COPY: - case NST_PSI: - return SQ_ALU; - case NST_FETCH_INST: { - fetch_node *f = static_cast<fetch_node*>(n); - if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX)) - return SQ_VTX; - return SQ_TEX; - } - case NST_CF_INST: - return SQ_CF; - default: - assert(0); - return SQ_NUM; + case NST_ALU_INST: + case NST_ALU_PACKED_INST: + case NST_COPY: + case NST_PSI: + return SQ_ALU; + case NST_FETCH_INST: { + fetch_node *f = static_cast<fetch_node*>(n); + if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX)) + return SQ_VTX; + return SQ_TEX; + } + case NST_CF_INST: + return SQ_CF; + default: + assert(0); + return SQ_NUM; } } @@ -642,10 +683,9 @@ void shader_stats::accumulate(shader_stats& s) { void shader_stats::dump() { sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack - << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses - << ", alu:" << alu << ", fetch:" << fetch - << ", fetch clauses:" << fetch_clauses - << ", cf:" << cf; + << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses + << ", alu:" << alu << ", fetch:" << fetch << ", fetch clauses:" + << fetch_clauses << ", cf:" << cf; if (shaders > 1) sblog << ", shaders:" << shaders; @@ -655,7 +695,7 @@ void shader_stats::dump() { static void print_diff(unsigned d1, unsigned d2) { if (d1) - sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%"; + sblog << ((int) d2 - (int) d1) * 100 / (int) d1 << "%"; else if (d2) sblog << "N/A"; else @@ -663,15 +703,24 @@ static void print_diff(unsigned d1, unsigned d2) { } void shader_stats::dump_diff(shader_stats& s) { - sblog << "dw:"; print_diff(ndw, s.ndw); - sblog << ", gpr:" ; print_diff(ngpr, s.ngpr); - sblog << ", stk:" ; print_diff(nstack, s.nstack); - sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups); - sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses); - sblog << ", alu:" ; print_diff(alu, s.alu); - sblog << ", fetch:" ; print_diff(fetch, s.fetch); - sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses); - sblog << ", cf:" ; print_diff(cf, s.cf); + sblog << "dw:"; + print_diff(ndw, s.ndw); + sblog << ", gpr:"; + print_diff(ngpr, s.ngpr); + sblog << ", stk:"; + print_diff(nstack, s.nstack); + sblog << ", alu groups:"; + print_diff(alu_groups, s.alu_groups); + sblog << ", alu clauses: "; + print_diff(alu_clauses, s.alu_clauses); + sblog << ", alu:"; + print_diff(alu, s.alu); + sblog << ", fetch:"; + print_diff(fetch, s.fetch); + sblog << ", fetch clauses:"; + print_diff(fetch_clauses, s.fetch_clauses); + sblog << ", cf:"; + print_diff(cf, s.cf); sblog << "\n"; } diff --git a/src/gallium/drivers/r600/sb/sb_shader.h b/src/gallium/drivers/r600/sb/sb_shader.h index 94e1470192..abc2d6bb34 100644 --- a/src/gallium/drivers/r600/sb/sb_shader.h +++ b/src/gallium/drivers/r600/sb/sb_shader.h @@ -277,7 +277,7 @@ public: coalescer coal; - static const unsigned temp_regid_offset = 512; + static const unsigned temp_regid_offset = 0; bbs_vec bbs; @@ -308,7 +308,7 @@ public: value* get_const_value(const literal & v); value* get_special_value(unsigned sv_id, unsigned version = 0); - value* create_temp_value(); + value* create_temp_value(int chan = 0); value* get_reg_value(value_kind kind, bool src, unsigned reg, unsigned chan, bool rel, value *r = NULL, unsigned arr_id = 0); @@ -323,7 +323,12 @@ public: value* get_value_version(value* v, unsigned ver); void init(); - void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, bool src); + void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, + bool src); + + void add_pinned_inputs(vvec& vec, value_kind kind, unsigned sel, + unsigned comp_mask, bool src, + unsigned pin_gpr_sel); void dump_ir(); diff --git a/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp b/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp index 3ad628bb68..6df2979452 100644 --- a/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp +++ b/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp @@ -201,8 +201,11 @@ bool ssa_rename::visit(alu_node& n, bool enter) { if (!n.dst.empty() && n.dst[0]) { // FIXME probably use separate pass for such things - if ((n.bc.op_ptr->flags & AF_INTERP) || n.bc.op == ALU_OP2_CUBE) + if ((n.bc.op_ptr->flags & AF_INTERP) || n.bc.op == ALU_OP2_CUBE) { n.dst[0]->flags |= VLF_PIN_CHAN; + n.dst[0]->pin_gpr = sel_chan(n.dst[0]->pin_gpr.sel(), + n.bc.slot); + } } } return true; diff --git a/src/gallium/drivers/r600/sb/sb_tgsi.cpp b/src/gallium/drivers/r600/sb/sb_tgsi.cpp index 2e4f778a81..0f0d6df745 100644 --- a/src/gallium/drivers/r600/sb/sb_tgsi.cpp +++ b/src/gallium/drivers/r600/sb/sb_tgsi.cpp @@ -79,7 +79,7 @@ const tgsi_translator::tgsi_inst_info tgsi_translator::tgsi_info_table[TGSI_OPCO /* 33 */ TI_DESC(ABS, ALU_OP1_MOV, ti_alu, 0), /* 34 */ TI_DESC(RCC, 0, ti_unsupported, 0), /* 35 */ TI_DESC(DPH, 0, ti_dot, 0), - /* 36 */ TI_DESC(COS, 0, ti_unsupported, 0), + /* 36 */ TI_DESC(COS, ALU_OP1_COS, ti_trig, 0), /* 37 */ TI_DESC(DDX, FETCH_OP_GET_GRADIENTS_H, ti_tex, 0), /* 38 */ TI_DESC(DDY, FETCH_OP_GET_GRADIENTS_V, ti_tex, 0), /* 39 */ TI_DESC(KILL, 0, ti_kill, 0), @@ -91,7 +91,7 @@ const tgsi_translator::tgsi_inst_info tgsi_translator::tgsi_info_table[TGSI_OPCO /* 45 */ TI_DESC(SEQ, ALU_OP2_SETE, ti_alu, 0), /* 46 */ TI_DESC(SFL, 0, ti_unsupported, 0), /* 47 */ TI_DESC(SGT, ALU_OP2_SETGT, ti_alu, 0), - /* 48 */ TI_DESC(SIN, 0, ti_unsupported, 0), + /* 48 */ TI_DESC(SIN, ALU_OP1_SIN, ti_trig, 0), /* 49 */ TI_DESC(SLE, ALU_OP2_SETGE, ti_alu, TIF_ALU_SWAPSRC01), /* 50 */ TI_DESC(SNE, ALU_OP2_SETNE, ti_alu, 0), /* 51 */ TI_DESC(STR, 0, ti_unsupported, 0), @@ -236,6 +236,7 @@ const tgsi_translator::tgsi_inst_info tgsi_translator::tgsi_info_table[TGSI_OPCO shader* tgsi_translator::translate() { shader_target target; + int r; tokens = ps->selector->tokens; tgsi_parse_init(&parse, tokens); @@ -264,10 +265,16 @@ shader* tgsi_translator::translate() { sh->init(); current = sh->root; - int r = parse_tokens(); - tgsi_parse_free(&parse); + if ((r = parse_declarations())) + return NULL; emit_inputs(); + + if ((r = parse_instructions())) + return NULL; + + tgsi_parse_free(&parse); + emit_exports(); update_pipe_shader(); @@ -279,7 +286,7 @@ shader* tgsi_translator::translate() { return sh; } -int tgsi_translator::parse_tokens() { +int tgsi_translator::parse_declarations() { int r; while (!tgsi_parse_end_of_tokens(&parse)) { @@ -295,7 +302,7 @@ int tgsi_translator::parse_tokens() { r = parse_immediate(); break; case TGSI_TOKEN_TYPE_INSTRUCTION: - r = parse_instruction(); + return 0; break; default: assert(!"unexpected tgsi token type"); @@ -307,6 +314,28 @@ int tgsi_translator::parse_tokens() { return 0; } +int tgsi_translator::parse_instructions() { + int r; + + while (true) { + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + r = parse_instruction(); + break; + default: + assert(!"unexpected tgsi token type"); + return -1; + } + if (r) + return r; + + if (tgsi_parse_end_of_tokens(&parse)) + break; + tgsi_parse_token(&parse); + }; + return 0; +} + int tgsi_translator::parse_property() { tgsi_full_property *property = &parse.FullToken.FullProperty; @@ -349,6 +378,7 @@ int tgsi_translator::parse_declaration() { interp_mask |= (1 << 2); input[i].d.spi_sid = spi_sid(input[i].d.name, input[i].d.sid); + switch (input[i].d.name) { case TGSI_SEMANTIC_FACE: face_input = i; @@ -395,9 +425,9 @@ int tgsi_translator::parse_declaration() { break; case TGSI_FILE_TEMPORARY: - if (d->Array.ArrayID) { + if (d->Array.ArrayID && d->Range.Last > d->Range.First) { sh->add_rel_array(VLK_TGSI_TEMP, d->Range.First, - d->Range.Last - d->Range.First + 1, 0xF, d->Array.ArrayID); + d->Range.Last - d->Range.First + 1, 0xF, d->Array.ArrayID); } break; @@ -408,9 +438,11 @@ int tgsi_translator::parse_declaration() { case TGSI_FILE_SYSTEM_VALUE: if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { + instanceid_index = d->Range.First; break; } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) - break; + vertexid_index = d->Range.First; + break; default: assert(!"unexpected tgsi declaration"); return -1; @@ -479,6 +511,7 @@ int tgsi_translator::parse_instruction() { args.dst.rel_addr_index = inst->Dst[0].Indirect.Index; args.dst.rel_addr_chan = inst->Dst[0].Indirect.Swizzle; args.dst.rel_array_id = inst->Dst[0].Indirect.ArrayID; + indirect_vlk |= (1 << args.dst.kind); } } @@ -486,13 +519,29 @@ int tgsi_translator::parse_instruction() { clamp = inst->Instruction.Saturate; args.nsrc = inst->Instruction.NumSrcRegs; + unsigned nconst = 0; + unsigned nliteral = 0; + for (i = 0; i < args.nsrc; ++i) { tgsi_arg &a = args.src[i]; a.file = inst->Src[i].Register.File; a.sel = inst->Src[i].Register.Index; - if (a.file != TGSI_FILE_SAMPLER) { - a.kind = file_to_value_kind(args.src[i].file); + if (a.file == TGSI_FILE_SYSTEM_VALUE) { + if (a.sel == instanceid_index) { + a.kind = VLK_REG; + a.sel = 0; + FILLV4(a.swz, SEL_W); + } else if (a.sel == vertexid_index) { + a.kind = VLK_REG; + a.sel = 0; + FILLV4(a.swz, SEL_X); + } else { + assert(!"unexpected system value"); + } + } else if (a.file != TGSI_FILE_SAMPLER) { + + a.kind = file_to_value_kind(a.file); a.rel = inst->Src[i].Register.Indirect; a.neg = inst->Src[i].Register.Negate; a.abs = inst->Src[i].Register.Absolute; @@ -501,17 +550,29 @@ int tgsi_translator::parse_instruction() { a.swz[2] = inst->Src[i].Register.SwizzleZ; a.swz[3] = inst->Src[i].Register.SwizzleW; + if (a.kind == VLK_KCACHE && inst->Src[i].Register.Dimension) + a.kc_bank = inst->Src[i].Dimension.Index; + if (a.rel) { assert(inst->Src[i].Indirect.File == TGSI_FILE_ADDRESS); a.rel_addr_index = inst->Src[i].Indirect.Index; a.rel_addr_chan = inst->Src[i].Indirect.Swizzle; a.rel_array_id = inst->Src[i].Indirect.ArrayID; + indirect_vlk |= (1 << a.kind); if (a.file == TGSI_FILE_CONSTANT) { - a.values = fetch_rel_const(a); - a.rel = 0; - a.kind = VLK_TEMP; + fetch_rel_const(a); } + } else if (a.file == TGSI_FILE_CONSTANT) { + if (nconst == 1) { + split_src_arg(a); + } else + ++nconst; + } else if (a.file == TGSI_FILE_IMMEDIATE) { + if (nliteral == 1) { + split_src_arg(a); + } else + ++nliteral; } } } @@ -549,9 +610,65 @@ int tgsi_translator::emit_fake_export(unsigned type) { } int tgsi_translator::emit_exports() { - int i, k, n; + int i, j, k, n; int next_pos = 60, next_pixel = 0, next_param = 0; + if (clip_vertex_write) { + int cd = noutput; + + noutput += 2; + output[cd].d.name = TGSI_SEMANTIC_CLIPDIST; + output[cd].tgsi_index = cd; + output[cd + 1].d.name = TGSI_SEMANTIC_CLIPDIST; + output[cd + 1].tgsi_index = cd + 1; + + output[cv_output].d.spi_sid = 0; + clip_dist_write = 0xFF; + + for (i = 0; i < 8; i++) { + int oreg = i >> 2, ochan = i & 3; + value *o = get_tgsi_value(VLK_TGSI_OUTPUT, cd + oreg, ochan); + alu_packed_node *p = sh->create_alu_packed(); + + for (j = 0; j < 4; j++) { + value *cvo = get_tgsi_value(VLK_TGSI_OUTPUT, cv_output, j); + value *cp = sh->get_kcache_value(R600_UCP_CONST_BUFFER, i, j); + alu_node *a = build_alu(ALU_OP2_DOT4, j == ochan ? o : NULL, 0, + asrc(cvo), asrc(cp)); + a->bc.slot = j; + p->push_back(a); + } + emit_node(p); + } + } + + pipe_stream_output_info &so = ps->selector->so; + for (i = 0; i < (int)so.num_outputs; i++) { + int nc = so.output[i].num_components; + unsigned start_comp = so.output[i].start_component, real_start; + unsigned index = so.output[i].register_index; + unsigned dst_offset = so.output[i].dst_offset; + unsigned buf = so.output[i].output_buffer; + unsigned op = so.output[i].output_buffer; + + assert(buf < 4); + op += ctx.is_egcm() ? CF_OP_MEM_STREAM0_BUF0 : CF_OP_MEM_STREAM0; + real_start = (dst_offset < start_comp) ? 0 : start_comp; + + cf_node *ms = sh->create_cf(op); + ms->bc.elem_size = nc; + ms->bc.array_base = dst_offset - real_start; + ms->bc.type = MEM_WRITE; + ms->bc.array_size = 0xFFF; + ms->src.resize(4); + + for (j = 0; j < nc; ++j) { + value *v = get_tgsi_value(VLK_TGSI_OUTPUT, index, start_comp + j); + ms->src[real_start + j] = v; + } + emit_node(ms); + } + for (i = 0; i < noutput; ++i) { shader_io &o = output[i]; unsigned ti = o.tgsi_index; @@ -561,7 +678,8 @@ int tgsi_translator::emit_exports() { case TARGET_VS: switch (o.d.name) { case TGSI_SEMANTIC_CLIPDIST: - emit_export(o, EXP_PARAM, next_param++, swz, ti); + if (o.d.spi_sid) + emit_export(o, EXP_PARAM, next_param++, swz, ti); /* fall through */ case TGSI_SEMANTIC_POSITION: case TGSI_SEMANTIC_PSIZE: @@ -573,6 +691,8 @@ int tgsi_translator::emit_exports() { swz[3] = 5; /* x001 */ emit_export(o, EXP_PARAM, next_param++, swz, ti); break; + case TGSI_SEMANTIC_CLIPVERTEX: + break; default: emit_export(o, EXP_PARAM, next_param++, swz, ti); } @@ -583,13 +703,13 @@ int tgsi_translator::emit_exports() { switch (o.d.name) { case TGSI_SEMANTIC_COLOR: - if (next_pixel && next_pixel > key.nr_cbufs) + if (next_pixel && next_pixel >= key.nr_cbufs) continue; swz[3] = key.alpha_to_one ? 5 : 3; n = (fs_write_all && ctx.is_egcm() && key.nr_cbufs) ? key.nr_cbufs : 1; for (k = 0; k < n; k++) { - emit_export(o, EXP_PIXEL, next_pixel++, swz, ti++); + emit_export(o, EXP_PIXEL, next_pixel++, swz, ti); } nr_ps_color_exports += n; break; @@ -671,27 +791,35 @@ vvec tgsi_translator::get_vector_values(value_kind kind, unsigned tgsi_index, } int tgsi_translator::emit_inputs() { - int i, k, nparam = 0; + int i, nparam = 0, gpr_reserved = 0; + + // XXX temporary workaround for lack of proper array support for inputs + if (ninput) + sh->add_rel_array(VLK_TGSI_INPUT, 0, ninput, 0xF, 0); + switch (sh->target) { case TARGET_VS: { cf_node *c = sh->create_cf(CF_OP_CALL_FS); - sh->root->push_front(c); - sh->add_pinned_gpr_values(c->src, 0, 0xF, true); + c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE; + sh->add_pinned_gpr_values(c->src, 0, 0xF, true); sh->add_input(0, true, 0xF); + // pin input arrays + for (i = 0; i < 4; ++i) { + rel_array *a = sh->get_rel_array(VLK_TGSI_INPUT, 0, i); + if (a) + a->gpr = sel_chan(1, i); + } + for (i = 0; i < ninput; ++i) { shader_io &in = input[i]; vvec dv = get_vector_values(VLK_TGSI_INPUT, in.tgsi_index); - - for (k = 0; k < 4; ++k) { - dv[k]->flags |= VLF_PIN_REG | VLF_PIN_CHAN; - dv[k]->gpr = dv[k]->pin_gpr = sel_chan(i + 1, k); - } c->dst.insert(c->dst.end(), dv.begin(), dv.end()); } - } + emit_node(c); break; + } case TARGET_PS: if (ctx.is_egcm()) { unsigned ij_pairs = ((interp_mask & 1) + (interp_mask >> 1)) @@ -705,18 +833,78 @@ int tgsi_translator::emit_inputs() { ++gpr; mask >>= 4; } + + gpr_reserved = gpr; + } + + // pin input arrays + for (i = 0; i < 4; ++i) { + rel_array *a = sh->get_rel_array(VLK_TGSI_INPUT, 0, i); + if (a) + a->gpr = sel_chan(gpr_reserved, i); + } + + if (key.color_two_side && colors_used) { + two_side = 1; + + if (face_input == -1) { + i = ninput++; + input[i].d.name = TGSI_SEMANTIC_FACE; + input[i].d.spi_sid = 0; + input[i].tgsi_index = i; + face_input = i; + } } for (i = 0; i < ninput; ++i) { shader_io &in = input[i]; + in.d.gpr = gpr_reserved++; + if (ctx.is_egcm() && in.d.spi_sid) { + in.d.lds_pos = nparam++; if (in.d.interpolate != TGSI_INTERPOLATE_CONSTANT) { - in.d.lds_pos = nparam++; in.d.ij_index = get_ij(in); - sh->root->push_front(build_interp(in, 0)); - sh->root->push_front(build_interp(in, 1)); + + emit_node(build_interp(in, 1)); + emit_node(build_interp(in, 0)); } else { - sh->root->push_front(build_interp_flat(in)); + emit_node(build_interp_flat(in)); + } + } else { + sh->add_pinned_inputs(sh->root->dst, VLK_TGSI_INPUT, + in.tgsi_index, 0xF, false, in.d.gpr); + + if (fragcoord_input == i) { + value* w = get_tgsi_value(VLK_TGSI_INPUT, i, SEL_W); + emit_alu(ALU_OP1_RECIP_IEEE, w, 0, asrc(w)); + } + } + + if (two_side) { + if (in.d.name == TGSI_SEMANTIC_COLOR) { + int ni = ninput++; + shader_io &nin = input[ni]; + nin = in; + nin.d.name = TGSI_SEMANTIC_BCOLOR; + nin.d.spi_sid = spi_sid(nin.d.name, nin.d.sid); + // back_color_input actually means front_color_input here + nin.d.back_color_input = i; + nin.tgsi_index = ni; + } else if (in.d.name == TGSI_SEMANTIC_BCOLOR) { + // both inputs are interpolated now, so select the color + int k; + shader_io &fin = input[in.d.back_color_input]; + + for (k = 0; k < 4; ++k) { + value *face = sh->get_value(VLK_TGSI_INPUT, + sel_chan(input[face_input].tgsi_index, 0)); + value *fv = sh->get_value(VLK_TGSI_INPUT, + sel_chan(fin.tgsi_index, k)); + value *bv = sh->get_value(VLK_TGSI_INPUT, + sel_chan(in.tgsi_index, k)); + emit_alu(ALU_OP3_CNDGT, fv, 0, asrc(face), asrc(fv), + asrc(bv)); + } } } } @@ -789,9 +977,9 @@ value* tgsi_translator::get_arg_value(tgsi_arg &ta, unsigned chan) { unsigned schan = ta.dst ? chan : ta.swz[chan]; if (ta.rel) { value *r = get_tgsi_value(VLK_TGSI_ADDR, ta.rel_addr_index, - ta.rel_addr_chan); + ta.rel_addr_chan); ta.values[chan] = sh->get_reg_value(ta.kind, !ta.dst, ta.sel, schan, - ta.rel, r, ta.rel_array_id); + ta.rel, r, ta.rel_array_id); } else ta.values[chan] = get_tgsi_value(ta.kind, ta.sel, schan); } @@ -806,17 +994,25 @@ value* tgsi_translator::get_arg_value(unsigned index, unsigned chan) { int tgsi_translator::ti_alu() { begin_group(); - if (info->tgsi_op == TGSI_OPCODE_SUB) + switch (info->tgsi_op) { + case TGSI_OPCODE_SUB: args.src[1].neg = !args.src[1].neg; + break; + case TGSI_OPCODE_ABS: + args.src[0].neg = 0; + args.src[0].abs = 1; + break; + } if (unlikely(info->flags & TIF_ALU_SWAPSRC01)) { - FOREACH_CHAN { - emit_alu(info->isa_op, tgsi_dst(ch), clamp, - asrc(args.src[1], ch), + FOREACH_CHAN + { + emit_alu(info->isa_op, tgsi_dst(ch), clamp, asrc(args.src[1], ch), asrc(args.src[0], ch)); } } else { - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(info->isa_op, ch); } } @@ -842,33 +1038,40 @@ int tgsi_translator::ti_dot() { s1 = 1; break; default: + nc = 0; assert(!"ti_dot: unexpected tgsi opcode"); } - // XXX maybe use MUL/DOT instead of DOT4 for nc < 4 - FOREACH_CHAN { - alu_packed_node *p = sh->create_alu_packed(); - alu_node *a; - for (i = 0; i < nc - s1; ++i) { - a = build_alu(ALU_OP2_DOT4, (i == ch) ? tgsi_dst(ch) : NULL, clamp, - asrc(args.src[0], i), asrc(args.src[1], i)); - a->bc.slot = i; - p->push_back(a); - } - if (s1) { - a = build_alu(ALU_OP2_DOT4, (i == ch) ? tgsi_dst(ch) : NULL, clamp, - asrc(literal(1.0f)), asrc(args.src[1], i)); - a->bc.slot = i++; - p->push_back(a); - } - for (; i < 4; ++i) { - a = build_alu(ALU_OP2_DOT4, (i == ch) ? tgsi_dst(ch) : NULL, clamp, - asrc(literal(0)), asrc(literal(0))); - a->bc.slot = i; - p->push_back(a); - } - emit_node(p); + unsigned ch = __builtin_ctz(write_mask); + unsigned nwc = __builtin_popcount(write_mask); + + value *t = nwc > 1 ? create_temp() : tgsi_dst(ch); + + alu_packed_node *p = sh->create_alu_packed(); + alu_node *a; + for (i = 0; i < nc - s1; ++i) { + a = build_alu(ALU_OP2_DOT4, (i == ch) ? t : NULL, clamp, + asrc(args.src[0], i), asrc(args.src[1], i)); + a->bc.slot = i; + p->push_back(a); + } + if (s1) { + a = build_alu(ALU_OP2_DOT4, (i == ch) ? t : NULL, clamp, + asrc(literal(1.0f)), asrc(args.src[1], i)); + a->bc.slot = i++; + p->push_back(a); } + for (; i < 4; ++i) { + a = build_alu(ALU_OP2_DOT4, (i == ch) ? t : NULL, clamp, + asrc(literal(0)), asrc(literal(0))); + a->bc.slot = i; + p->push_back(a); + } + emit_node(p); + + if (nwc > 1) + ti_replicate(t); + return 0; } @@ -880,7 +1083,8 @@ int tgsi_translator::ti_repl() { args.src[0].neg = 0; break; } - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(info->isa_op, 0, ch); } return 0; @@ -934,8 +1138,8 @@ alu_node* tgsi_translator::create_alu(unsigned op) { a->bc.set_op(op); a->bc.slot_flags = (alu_op_flags) ctx.alu_slots(a->bc.op_ptr); if (a->bc.op_ptr->flags & AF_KILL) { - a->flags |= NF_DONT_HOIST | NF_DONT_MOVE | - NF_DONT_KILL | NF_SCHEDULE_EARLY; + a->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL + | NF_SCHEDULE_EARLY; } else if (a->bc.op_ptr->flags & (AF_PRED | AF_MOVA)) { a->flags |= NF_DONT_HOIST; } @@ -944,7 +1148,8 @@ alu_node* tgsi_translator::create_alu(unsigned op) { int tgsi_translator::ti_trig() { value *t = prepare_trig(asrc(args.src[0], 0)); - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(info->isa_op, tgsi_dst(ch), clamp, t); } return 0; @@ -967,7 +1172,7 @@ int tgsi_translator::ti_scs() { } value* tgsi_translator::prepare_trig(alu_src s) { - static float half_inv_pi = 1.0 /(3.1415926535 * 2); + static float half_inv_pi = 1.0 / (3.1415926535 * 2); static float double_pi = 3.1415926535 * 2; static float neg_pi = -3.1415926535; @@ -979,7 +1184,8 @@ value* tgsi_translator::prepare_trig(alu_src s) { if (ctx.is_r600()) emit_alu(ALU_OP3_MULADD, t, 0, asrc(t), asrc(double_pi), asrc(neg_pi)); else - emit_alu(ALU_OP2_ADD, t, 0, asrc(t), asrc(-0.5f)); +// emit_alu(ALU_OP2_ADD, t, 0, asrc(t), asrc(-0.5f)); + emit_alu(ALU_OP3_MULADD, t, 0, asrc(t), asrc(1.0f), asrc(0.5f, 0, 1)); return t; } @@ -996,7 +1202,8 @@ int tgsi_translator::ti_exp() { if (write_mask & (1 << SEL_Y)) emit_alu(ALU_OP1_FRACT, tgsi_dst(SEL_Y), clamp, asrc(args.src[0], 0)); if (write_mask & (1 << SEL_Z)) - emit_alu(ALU_OP1_EXP_IEEE, tgsi_dst(SEL_Z), clamp, asrc(args.src[0], 0)); + emit_alu(ALU_OP1_EXP_IEEE, tgsi_dst(SEL_Z), clamp, + asrc(args.src[0], 0)); if (write_mask & (1 << SEL_W)) emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f)); end_group(); @@ -1008,9 +1215,7 @@ int tgsi_translator::ti_log() { value *t2 = create_temp(); value *t3 = create_temp(); - alu_src s = asrc(args.src[0], 0); - s.abs = 1; - s.neg = 0; + alu_src s = asrc(args.src[0], 0, 1, 0); if (write_mask & 0x7) emit_alu(ALU_OP1_LOG_IEEE, t, 0, s); @@ -1025,11 +1230,11 @@ int tgsi_translator::ti_log() { if (write_mask & (1 << SEL_X)) emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), clamp, t2); if (write_mask & (1 << SEL_Y)) - emit_alu(ALU_OP2_MUL, tgsi_dst(SEL_Y), clamp, s, asrc(t2)); + emit_alu(ALU_OP2_MUL, tgsi_dst(SEL_Y), clamp, s, asrc(t3)); if (write_mask & (1 << SEL_Z)) emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), clamp, t); if (write_mask & (1 << SEL_W)) - emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), 0, asrc(1.0f)); + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f)); end_group(); return 0; } @@ -1040,11 +1245,11 @@ int tgsi_translator::ti_dst() { emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, asrc(1.0f)); if (write_mask & (1 << SEL_Y)) emit_alu(ALU_OP2_MUL, tgsi_dst(SEL_Y), clamp, asrc(args.src[0], SEL_Y), - asrc(args.src[1], SEL_Y)); + asrc(args.src[1], SEL_Y)); if (write_mask & (1 << SEL_Z)) emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), clamp, asrc(args.src[0], SEL_Z)); if (write_mask & (1 << SEL_W)) - emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), clamp, asrc(args.src[1], SEL_W)); + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), clamp, asrc(args.src[1], SEL_W)); end_group(); return 0; } @@ -1053,12 +1258,19 @@ int tgsi_translator::ti_lrp() { vvec t; create_temps(t, 4); - FOREACH_CHAN { - emit_alu(ALU_OP2_ADD, t[ch], 0, asrc(1.0f), asrc(args.src[0], ch, 0, 1)); + FOREACH_CHAN + { + emit_alu(ALU_OP2_ADD, t[ch], 0, asrc(1.0f), + asrc(args.src[0], ch, 0, 1)); emit_alu(ALU_OP2_MUL, t[ch], 0, asrc(t[ch]), asrc(args.src[2], ch)); + } + begin_group(); + FOREACH_CHAN + { emit_alu(ALU_OP3_MULADD, tgsi_dst(ch), clamp, asrc(args.src[0], ch), - asrc(args.src[1], ch), asrc(t[ch])); + asrc(args.src[1], ch), asrc(t[ch])); } + end_group(); return 0; } @@ -1074,7 +1286,8 @@ int tgsi_translator::ti_pow() { int tgsi_translator::ti_replicate(value* t) { begin_group(); - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(ALU_OP1_MOV, tgsi_dst(ch), 0, asrc(t)); } end_group(); @@ -1082,23 +1295,24 @@ int tgsi_translator::ti_replicate(value* t) { } int tgsi_translator::ti_xpd() { - static const unsigned int src0_swizzle[] = {2, 0, 1}; - static const unsigned int src1_swizzle[] = {1, 2, 0}; + static const unsigned int src0_swizzle[] = { 2, 0, 1 }; + static const unsigned int src1_swizzle[] = { 1, 2, 0 }; vvec t; create_temps(t, 3); - FOREACH_CHAN { + FOREACH_CHAN + { if (ch < SEL_W) - emit_alu(ALU_OP2_MUL, t[ch], 0, - asrc(args.src[0], src0_swizzle[ch]), - asrc(args.src[1], src1_swizzle[ch])); + emit_alu(ALU_OP2_MUL, t[ch], 0, asrc(args.src[0], src0_swizzle[ch]), + asrc(args.src[1], src1_swizzle[ch])); } begin_group(); - FOREACH_CHAN { + FOREACH_CHAN + { if (ch < SEL_W) emit_alu(ALU_OP3_MULADD, tgsi_dst(ch), clamp, - asrc(args.src[0], src1_swizzle[ch]), - asrc(args.src[1], src0_swizzle[ch]), asrc(t[ch], 0, 1)); + asrc(args.src[0], src1_swizzle[ch]), + asrc(args.src[1], src0_swizzle[ch]), asrc(t[ch], 0, 1)); else emit_alu(ALU_OP1_MOV, tgsi_dst(ch), 0, asrc(1.0f)); } @@ -1109,6 +1323,9 @@ int tgsi_translator::ti_xpd() { int tgsi_translator::ti_kill() { int i; + // XXX if this affects performance, we might want to do it after DCE + uses_kill = true; + for (i = 0; i < 4; ++i) { if (info->tgsi_op == TGSI_OPCODE_KILL_IF) emit_alu(ALU_OP2_KILLGT, NULL, 0, asrc(0.0f), asrc(args.src[0], i)); @@ -1120,23 +1337,24 @@ int tgsi_translator::ti_kill() { int tgsi_translator::ti_arl() { switch (info->tgsi_op) { - case TGSI_OPCODE_ARR: + case TGSI_OPCODE_ARL: if (ctx.is_egcm()) { emit_alu(ALU_OP1_FLT_TO_INT_FLOOR, tgsi_dst(SEL_X), 0, - asrc(args.src[0], 0)); + asrc(args.src[0], 0)); } else { value *t = create_temp(); emit_alu(ALU_OP1_FLOOR, t, 0, asrc(args.src[0], 0)); emit_alu(ALU_OP1_FLT_TO_INT, tgsi_dst(SEL_X), 0, asrc(t)); } break; - case TGSI_OPCODE_ARL: + case TGSI_OPCODE_ARR: emit_alu(ALU_OP1_FLT_TO_INT, tgsi_dst(SEL_X), 0, asrc(args.src[0], 0)); break; case TGSI_OPCODE_UARL: emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, asrc(args.src[0], 0)); break; - default: assert(!"ti_arl: unexpected opcode"); + default: + assert(!"ti_arl: unexpected opcode"); } return 0; } @@ -1145,39 +1363,45 @@ int tgsi_translator::ti_ssg() { vvec t; create_temps(t, 4); if (info->tgsi_op == TGSI_OPCODE_SSG) { - FOREACH_CHAN { - emit_alu(ALU_OP3_CNDGE, t[ch], 0, asrc(args.src[0], ch), - asrc(0.0f), asrc(-1.0f)); + FOREACH_CHAN + { + emit_alu(ALU_OP3_CNDGE, t[ch], 0, asrc(args.src[0], ch), asrc(0.0f), + asrc(-1.0f)); } begin_group(); - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(ALU_OP3_CNDGT, tgsi_dst(ch), 0, asrc(args.src[0], ch), - asrc(1.0f), asrc(t[ch])); + asrc(1.0f), asrc(t[ch])); } end_group(); } else { // ISSG - FOREACH_CHAN { - emit_alu(ALU_OP3_CNDGE_INT, t[ch], 0, asrc(args.src[0], ch, 0, 1), - asrc(0u), asrc(-1u)); + FOREACH_CHAN + { + emit_alu(ALU_OP3_CNDGE_INT, t[ch], 0, asrc(args.src[0], ch), + asrc(0u), asrc(-1u)); } begin_group(); - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(ALU_OP3_CNDGT_INT, tgsi_dst(ch), 0, asrc(args.src[0], ch), - asrc(1u), asrc(t[ch])); + asrc(1u), asrc(t[ch])); } + end_group(); } return 0; } int tgsi_translator::ti_cmp() { begin_group(); - FOREACH_CHAN { + FOREACH_CHAN + { if (info->tgsi_op == TGSI_OPCODE_CMP) emit_alu(ALU_OP3_CNDGE, tgsi_dst(ch), clamp, asrc(args.src[0], ch), - asrc(args.src[2], ch), asrc(args.src[1], ch)); + asrc(args.src[2], ch), asrc(args.src[1], ch)); else emit_alu(ALU_OP3_CNDE_INT, tgsi_dst(ch), 0, asrc(args.src[0], ch), - asrc(args.src[2], ch), asrc(args.src[1], ch)); + asrc(args.src[2], ch), asrc(args.src[1], ch)); } end_group(); return 0; @@ -1187,14 +1411,16 @@ int tgsi_translator::ti_umad() { vvec t; create_temps(t, 4); - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(ALU_OP2_MULLO_INT, t[ch], 0, asrc(args.src[0], ch), - asrc(args.src[1], ch)); + asrc(args.src[1], ch)); } begin_group(); - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(ALU_OP2_ADD_INT, tgsi_dst(ch), 0, asrc(t[ch]), - asrc(args.src[2], ch)); + asrc(args.src[2], ch)); } end_group(); return 0; @@ -1203,10 +1429,12 @@ int tgsi_translator::ti_umad() { int tgsi_translator::ti_f2iu() { vvec t; create_temps(t, 4); - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(ALU_OP1_TRUNC, t[ch], 0, asrc(args.src[0], ch)); } - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(info->isa_op, tgsi_dst(ch), 0, t[ch]); } return 0; @@ -1214,9 +1442,10 @@ int tgsi_translator::ti_f2iu() { int tgsi_translator::ti_ineg() { begin_group(); - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(ALU_OP2_SUB_INT, tgsi_dst(ch), 0, asrc(0u), - asrc(args.src[0], ch)); + asrc(args.src[0], ch)); } end_group(); return 0; @@ -1225,14 +1454,15 @@ int tgsi_translator::ti_ineg() { int tgsi_translator::ti_iabs() { vvec t; create_temps(t, 4); - FOREACH_CHAN { - emit_alu(ALU_OP2_SUB_INT, t[ch], 0, asrc(0u), - asrc(args.src[0], ch)); + FOREACH_CHAN + { + emit_alu(ALU_OP2_SUB_INT, t[ch], 0, asrc(0u), asrc(args.src[0], ch)); } begin_group(); - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(ALU_OP3_CNDGE_INT, tgsi_dst(ch), 0, asrc(args.src[0], ch), - asrc(args.src[0], ch), asrc(t[ch])); + asrc(args.src[0], ch), asrc(t[ch])); } end_group(); return 0; @@ -1251,7 +1481,8 @@ int tgsi_translator::ti_divmod() { mod = true; case TGSI_OPCODE_UDIV: break; - default: assert(!"ti_divmod: unexpected tgsi opcode"); + default: + assert(!"ti_divmod: unexpected tgsi opcode"); } // TODO optimize for constant src1 (omit RECIP error correction) @@ -1269,26 +1500,25 @@ int tgsi_translator::ti_divmod() { value *t2z = create_temp(); value *t3x = create_temp(); - FOREACH_CHAN { + FOREACH_CHAN + { if (signed_op) { /* tmp2.x = -src0 */ - emit_alu(ALU_OP2_SUB_INT, t2x, 0, asrc(0u), - asrc(args.src[0], ch)); + emit_alu(ALU_OP2_SUB_INT, t2x, 0, asrc(0u), asrc(args.src[0], ch)); /* tmp2.y = -src1 */ - emit_alu(ALU_OP2_SUB_INT, t2y, 0, asrc(0u), - asrc(args.src[0], ch)); + emit_alu(ALU_OP2_SUB_INT, t2y, 0, asrc(0u), asrc(args.src[1], ch)); /* tmp2.z sign bit is set if src0 and src2 signs are different */ /* it will be a sign of the quotient */ if (!mod) { - emit_alu(ALU_OP2_XOR_INT, t2x, 0, asrc(args.src[0], ch), - asrc(args.src[1], ch)); + emit_alu(ALU_OP2_XOR_INT, t2z, 0, asrc(args.src[0], ch), + asrc(args.src[1], ch)); } /* tmp2.x = |src0| */ emit_alu(ALU_OP3_CNDGE_INT, t2x, 0, asrc(args.src[0], ch), - asrc(args.src[0], ch), asrc(t2x)); + asrc(args.src[0], ch), asrc(t2x)); /* tmp2.y = |src1| */ emit_alu(ALU_OP3_CNDGE_INT, t2y, 0, asrc(args.src[1], ch), - asrc(args.src[1], ch), asrc(t2y)); + asrc(args.src[1], ch), asrc(t2y)); } else { // unsigned // copy sources to the same temps as in signed variant just // to simplify generation of further operations. @@ -1372,18 +1602,18 @@ int tgsi_translator::ti_divmod() { if (mod) { /* sign of the remainder is the same as the sign of src0 */ /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ - emit_alu(ALU_OP3_CNDGE_INT, t0x, 0, asrc(t2x), asrc(t0z), - asrc(t0x)); + emit_alu(ALU_OP3_CNDGE_INT, tgsi_dst(ch), 0, asrc(t2x), + asrc(t0z), asrc(t0x)); } else { /* fix the quotient sign (same as the sign of src0*src1) */ /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ - emit_alu(ALU_OP3_CNDGE_INT, t0x, 0, asrc(t2z), asrc(t0z), - asrc(t0x)); + emit_alu(ALU_OP3_CNDGE_INT, tgsi_dst(ch), 0, asrc(t2z), + asrc(t0z), asrc(t0x)); } } else { // unsigned /* 19. dst = tmp1.y==0 ? tmp1.w : tmp0.z */ emit_alu(ALU_OP3_CNDE_INT, tgsi_dst(ch), 0, asrc(t1y), asrc(t1w), - asrc(t0z)); + asrc(t0z)); } } return 0; @@ -1392,7 +1622,7 @@ int tgsi_translator::ti_divmod() { fetch_node* tgsi_translator::create_fetch(unsigned op) { fetch_node *f = sh->create_fetch(); f->bc.set_op(op); - f->src.resize((f->bc.op_ptr->flags & FF_VTX) ? 1 : 4); + f->src.resize(4); f->dst.resize(4); VSWZ_XYZW(f->bc.dst_sel); return f; @@ -1455,8 +1685,7 @@ inline alu_src tgsi_translator::asrc(literal l, int abs, int neg) { } inline alu_src tgsi_translator::asrc(tgsi_arg& ta, int chan) { - return alu_src(get_arg_value(ta, chan), ta.abs, - ta.neg); + return alu_src(get_arg_value(ta, chan), ta.abs, ta.neg); } inline alu_src tgsi_translator::asrc(tgsi_arg& ta, int chan, int abs, int neg) { @@ -1484,16 +1713,22 @@ int tgsi_translator::ti_lit() { value *tx = create_temp(); value *tz = create_temp(); - emit_alu(ALU_OP2_MAX, tx, 0, asrc(args.src[0], SEL_Y), asrc(0.0f)); - emit_alu(ALU_OP1_LOG_CLAMPED, tz, 0, asrc(tx)); - emit_alu(ALU_OP3_MUL_LIT, tx, 0, asrc(tz), asrc(args.src[0], SEL_W), - asrc(args.src[0], SEL_X)); - emit_alu(ALU_OP1_EXP_IEEE, tgsi_dst(SEL_Z), clamp, asrc(tx)); + if (write_mask & (1 << SEL_Z)) { + emit_alu(ALU_OP2_MAX, tx, 0, asrc(args.src[0], SEL_Y), asrc(0.0f)); + emit_alu(ALU_OP1_LOG_CLAMPED, tz, 0, asrc(tx)); + emit_alu(ALU_OP3_MUL_LIT, tx, 0, asrc(tz), asrc(args.src[0], SEL_W), + asrc(args.src[0], SEL_X)); + } begin_group(); - emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, asrc(1.0f)); - emit_alu(ALU_OP2_MAX, tgsi_dst(SEL_Y), clamp, asrc(args.src[0], SEL_X), - asrc(0.0f)); - emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f)); + if (write_mask & (1 << SEL_X)) + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, asrc(1.0f)); + if (write_mask & (1 << SEL_Y)) + emit_alu(ALU_OP2_MAX, tgsi_dst(SEL_Y), clamp, asrc(args.src[0], SEL_X), + asrc(0.0f)); + if (write_mask & (1 << SEL_Z)) + emit_alu(ALU_OP1_EXP_IEEE, tgsi_dst(SEL_Z), clamp, asrc(tx)); + if (write_mask & (1 << SEL_W)) + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f)); end_group(); return 0; } @@ -1529,6 +1764,8 @@ int tgsi_translator::emit_alu(unsigned op, value* dst, int clamp, value* s0, value* tgsi_translator::get_tgsi_value(value_kind kind, unsigned index, unsigned chan) { switch (kind) { + case VLK_REG: + return sh->get_gpr_value(true, index, chan, false); case VLK_CONST: return sh->get_const_value(literals[(index << 2) + chan]); case VLK_KCACHE: @@ -1575,6 +1812,7 @@ int tgsi_translator::update_pipe_shader() { ps->shader.vs_out_point_size = vs_out_point_size; ps->shader.uses_tex_buffers = uses_tex_buffers; ps->shader.has_txq_cube_array_z_comp = has_txq_cube_array_z_comp; + ps->shader.two_side = two_side; return 0; } @@ -1588,12 +1826,14 @@ int tgsi_translator::ti_buffer_txq() { if (ctx.is_egcm()) emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, - asrc(sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id >> 2, - id & 3))); + asrc( + sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id >> 2, + id & 3))); else emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, - asrc(sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id << 1, - 1))); + asrc( + sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id << 1, + 1))); return 0; } @@ -1623,16 +1863,19 @@ int tgsi_translator::ti_vtx_fetch() { if (ctx.is_egcm()) return 0; - FOREACH_CHAN { + FOREACH_CHAN + { emit_alu(ALU_OP2_AND_INT, f->dst[ch], 0, asrc(f->dst[ch]), - asrc(sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id << 1, - ch))); + asrc( + sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id << 1, + ch))); } if (write_mask & (1 << SEL_W)) { emit_alu(ALU_OP2_AND_INT, f->dst[SEL_W], 0, asrc(f->dst[SEL_W]), - asrc(sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, - 1 + (id << 1), 0))); + asrc( + sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, + 1 + (id << 1), 0))); } return 0; } @@ -1706,14 +1949,16 @@ int tgsi_translator::ti_tex() { alu_packed_node *p = sh->create_alu_packed(); create_temps(tv, 4); - for (i = 0; i < 4; ++i) - p->push_back( - build_alu(ALU_OP2_CUBE, tv[i], 0, - asrc(args.src[0], cube_swizzle[i]), - asrc(args.src[0], cube_swizzle[3 - i]))); + for (i = 0; i < 4; ++i) { + alu_node *a = build_alu(ALU_OP2_CUBE, tv[i], 0, + asrc(args.src[0], cube_swizzle[i]), + asrc(args.src[0], cube_swizzle[3 - i])); + a->bc.slot = i; + p->push_back(a); + } emit_node(p); - emit_alu(ALU_OP1_RECIP_IEEE, tv[SEL_Z], 0, asrc(tv[SEL_Z])); + emit_alu(ALU_OP1_RECIP_IEEE, tv[SEL_Z], 0, asrc(tv[SEL_Z], 1)); emit_alu(ALU_OP3_MULADD, tv[SEL_X], 0, asrc(tv[SEL_X]), asrc(tv[SEL_Z]), asrc(1.5f)); emit_alu(ALU_OP3_MULADD, tv[SEL_Y], 0, asrc(tv[SEL_Y]), asrc(tv[SEL_Z]), @@ -1790,7 +2035,7 @@ int tgsi_translator::ti_tex() { emit_node(f); value *tx = create_temp(); - emit_alu(ALU_OP2_MULLO_INT, tw, 0, asrc(src[3]), asrc(4u)); + emit_alu(ALU_OP2_MULLO_INT, tx, 0, asrc(src[3]), asrc(4u)); emit_alu(ALU_OP2_LSHR_INT, src[3], 0, asrc(tw), asrc(tx)); emit_alu(ALU_OP2_AND_INT, src[3], 0, asrc(src[3]), asrc(0xFu)); } @@ -2020,11 +2265,25 @@ int tgsi_translator::ti_end_loop() { return 0; } -vvec tgsi_translator::fetch_rel_const(tgsi_arg& ta) { +int tgsi_translator::split_src_arg(tgsi_arg &ta) { + int k; + vvec t; + create_temps(t, 4); + + for (k = 0; k < 4; ++k) { + emit_alu(ALU_OP1_MOV, t[k], 0, asrc(get_arg_value(ta, k))); + } + ta.rel = 0; + ta.values = t; + ta.kind = VLK_TEMP; + return 0; +} + +int tgsi_translator::fetch_rel_const(tgsi_arg& ta) { int i; value* t = create_temp(); value* addr = get_tgsi_value(VLK_TGSI_ADDR, ta.rel_addr_index, 0); - emit_alu(ALU_OP2_ADD_INT, t, 0, asrc(addr), asrc((unsigned)ta.sel)); + emit_alu(ALU_OP2_ADD_INT, t, 0, asrc(addr), asrc((unsigned) ta.sel)); vvec r; create_temps(r, 4); @@ -2046,8 +2305,10 @@ vvec tgsi_translator::fetch_rel_const(tgsi_arg& ta) { } emit_node(f); - return r; + ta.values = r; + ta.rel = 0; + ta.kind = VLK_TEMP; + return 0; } - } // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_tgsi.h b/src/gallium/drivers/r600/sb/sb_tgsi.h index f70e368a81..bbfb115315 100644 --- a/src/gallium/drivers/r600/sb/sb_tgsi.h +++ b/src/gallium/drivers/r600/sb/sb_tgsi.h @@ -131,6 +131,14 @@ class tgsi_translator { int uses_tex_buffers; int has_txq_cube_array_z_comp; + // XXX probably unused now + unsigned indirect_vlk; + + int instanceid_index; + int vertexid_index; + + boolean two_side; + unsigned clip_dist_write; unsigned fs_write_all; unsigned uses_kill; @@ -160,7 +168,8 @@ public: clip_vertex_write(), cv_output(), nr_ps_max_color_exports(), nr_ps_color_exports(), vs_out_misc_write(), vs_out_point_size(), uses_tex_buffers(), - has_txq_cube_array_z_comp(), + has_txq_cube_array_z_comp(), indirect_vlk(), + instanceid_index(-1), vertexid_index(-1), two_side(), clip_dist_write(), fs_write_all(), uses_kill(), tgsi_proc(), interp_mask(), file_offset(), current(), @@ -172,13 +181,16 @@ private: int spi_sid(int name, int sid); - int parse_tokens(); + int parse_declarations(); + int parse_instructions(); int parse_property(); int parse_declaration(); int parse_immediate(); int parse_instruction(); + int split_src_arg(tgsi_arg &ta); + int emit_inputs(); int get_ij(shader_io &in); alu_packed_node* build_interp(shader_io& in, unsigned type); @@ -300,7 +312,7 @@ private: alu_src asrc(tgsi_arg& ta, int chan); alu_src asrc(tgsi_arg& ta, int chan, int abs, int neg); - value* create_temp() { return sh->create_temp_value(); } + value* create_temp(int chan = 0) { return sh->create_temp_value(chan); } void create_temps(vvec &temps, int n) { temps.resize(n); for (int i = 0; i < n; ++i) @@ -311,7 +323,8 @@ private: fetch_node* create_fetch(unsigned op); - vvec fetch_rel_const(tgsi_arg& ta);}; + int fetch_rel_const(tgsi_arg& ta); +}; } // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_valtable.cpp b/src/gallium/drivers/r600/sb/sb_valtable.cpp index b87d957a89..ad2e78b611 100644 --- a/src/gallium/drivers/r600/sb/sb_valtable.cpp +++ b/src/gallium/drivers/r600/sb/sb_valtable.cpp @@ -145,7 +145,7 @@ sb_ostream& operator << (sb_ostream &o, value &v) { sel_chan g; - if (v.is_rel()) { + if (v.array) { g = v.array->gpr; } else { g = v.gpr; |