summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVadim Girlin <vadimgirlin@gmail.com>2013-07-23 22:45:35 +0400
committerVadim Girlin <vadimgirlin@gmail.com>2013-07-23 22:45:35 +0400
commit439e0cf1975e0b8bb8c0942da5b9ec6e24e61a24 (patch)
tree8ad28b27435682cd8cb5af50a99f635ac3278383
parent759731de4331eb92ea47e005b79bd72fd44b3ab4 (diff)
wip
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_finalize.cpp27
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_parser.cpp8
-rw-r--r--src/gallium/drivers/r600/sb/sb_core.cpp2
-rw-r--r--src/gallium/drivers/r600/sb/sb_ir.h11
-rw-r--r--src/gallium/drivers/r600/sb/sb_ra_init.cpp4
-rw-r--r--src/gallium/drivers/r600/sb/sb_sched.cpp78
-rw-r--r--src/gallium/drivers/r600/sb/sb_sched.h11
-rw-r--r--src/gallium/drivers/r600/sb/sb_shader.cpp239
-rw-r--r--src/gallium/drivers/r600/sb/sb_shader.h11
-rw-r--r--src/gallium/drivers/r600/sb/sb_ssa_builder.cpp5
-rw-r--r--src/gallium/drivers/r600/sb/sb_tgsi.cpp591
-rw-r--r--src/gallium/drivers/r600/sb/sb_tgsi.h21
-rw-r--r--src/gallium/drivers/r600/sb/sb_valtable.cpp2
13 files changed, 725 insertions, 285 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 21432912e4..ffeb08414b 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -32,6 +32,8 @@
#define FBC_DUMP(q)
#endif
+#include "cmath"
+
#include "sb_bc.h"
#include "sb_shader.h"
#include "sb_pass.h"
@@ -337,13 +339,28 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
literal lv = v->literal_value;
src.chan = 0;
+ if (src.abs) {
+ lv.f = fabs(lv.f);
+ src.abs = 0;
+ }
+ if (src.neg) {
+ lv.f = -lv.f;
+ src.neg = 0;
+ }
+
if (lv == literal(0))
src.sel = ALU_SRC_0;
else if (lv == literal(0.5f))
src.sel = ALU_SRC_0_5;
- else if (lv == literal(1.0f))
+ else if (lv == literal(-0.5f)) {
+ src.sel = ALU_SRC_0_5;
+ src.neg = 1;
+ } else if (lv == literal(1.0f))
+ src.sel = ALU_SRC_1;
+ else if (lv == literal(-1.0f)) {
src.sel = ALU_SRC_1;
- else if (lv == literal(1))
+ src.neg = 1;
+ } else if (lv == literal(1))
src.sel = ALU_SRC_1_INT;
else if (lv == literal(-1))
src.sel = ALU_SRC_M_1_INT;
@@ -481,9 +498,11 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
value *v = f->src[chan];
- if (!v || v->is_undef()) {
+ if (!v)
sel = SEL_MASK;
- } else if (v->is_const()) {
+ else if (v->is_undef())
+ sel = SEL_0;
+ else if (v->is_const()) {
literal l = v->literal_value;
if (l == literal(0))
sel = SEL_0;
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index 0b1d7cb919..320a0811a0 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -382,6 +382,14 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
bc_alu_src &src = n->bc.src[s];
if (src.sel == ALU_SRC_LITERAL) {
+ if (src.abs) {
+ src.value.f = fabs(src.value.f);
+ src.abs = 0;
+ }
+ if (src.neg) {
+ src.value.f = -src.value.f;
+ src.neg = 0;
+ }
n->src[s] = sh->get_const_value(src.value);
} else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp
index 696e10b68a..08dc032f07 100644
--- a/src/gallium/drivers/r600/sb/sb_core.cpp
+++ b/src/gallium/drivers/r600/sb/sb_core.cpp
@@ -206,7 +206,7 @@ int r600_sb_compile_tgsi(struct r600_context *rctx,
time_start = os_time_get_nano();
}
- unsigned shader_id = r600_next_shader_id();
+ unsigned shader_id = bc->debug_id;
SB_DUMP_STAT( sblog << "\nsb: shader " << shader_id << "\n"; );
// translate from tgsi
diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h
index f5ecad6b3f..716af597c5 100644
--- a/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/src/gallium/drivers/r600/sb/sb_ir.h
@@ -493,7 +493,7 @@ protected:
value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0)
: kind(k), flags(),
rel(), array(),
- version(ver), select(select), pin_gpr(select), gpr(),
+ version(ver), select(select), pin_gpr(), gpr(),
gvn_source(), ghash(),
def(), adef(), uses(), constraint(), chunk(),
literal_value(), uid(sh_id) {}
@@ -560,7 +560,8 @@ public:
}
bool is_any_gpr() {
- return (kind == VLK_REG || kind == VLK_TEMP || is_tgsi_value());
+ return (!rel &&
+ (kind == VLK_REG || kind == VLK_TEMP || is_tgsi_value()));
}
bool is_agpr() {
@@ -604,8 +605,10 @@ public:
&& literal_value != literal(0)
&& literal_value != literal(1)
&& literal_value != literal(-1)
- && literal_value != literal(0.5)
- && literal_value != literal(1.0);
+ && literal_value != literal(0.5f)
+ && literal_value != literal(-0.5f)
+ && literal_value != literal(1.0f)
+ && literal_value != literal(-1.0f);
}
void add_use(node *n, use_kind kind, int arg);
diff --git a/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/src/gallium/drivers/r600/sb/sb_ra_init.cpp
index 4da556032f..856a2d9a46 100644
--- a/src/gallium/drivers/r600/sb/sb_ra_init.cpp
+++ b/src/gallium/drivers/r600/sb/sb_ra_init.cpp
@@ -349,9 +349,7 @@ void ra_init::process_op(node* n) {
break;
}
}
- }
-
- if (n->is_fetch_inst() || n->is_cf_inst()) {
+ } else if (n->is_fetch_inst() || n->is_cf_inst()) {
for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
value *v = *I;
if (v && v->is_sgpr())
diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp
index f0e41f5863..fd0f761549 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -939,9 +939,10 @@ void post_scheduler::update_live(node *n, val_set *born) {
void post_scheduler::process_group() {
alu_group_tracker &rt = alu.grp();
-
val_set vals_born;
+ prev_array_read.clear();
+
recolor_locals();
PSC_DUMP(
@@ -956,6 +957,7 @@ void post_scheduler::process_group() {
continue;
update_live(n, &vals_born);
+ update_prev_array_read(n);
}
PSC_DUMP(
@@ -1014,7 +1016,10 @@ void post_scheduler::schedule_alu(container_node *c) {
prev_regmap = regmap;
if (!prepare_alu_group()) {
- if (alu.current_ar) {
+ if (latency_check_failed) {
+ emit_nop_group();
+ continue;
+ } else if (alu.current_ar) {
emit_load_ar();
continue;
} else
@@ -1263,6 +1268,11 @@ bool post_scheduler::map_src_val(value *v) {
return true;
sel_chan gpr = v->get_final_gpr();
+
+ PSC_DUMP(
+ sblog << "map src " << *v << " to " << gpr << "\n";
+ );
+
rv_map::iterator F = regmap.find(gpr);
value *c = NULL;
if (F != regmap.end()) {
@@ -1436,6 +1446,11 @@ unsigned post_scheduler::try_add_instruction(node *n) {
alu_group_tracker &rt = alu.grp();
+#if 0 // this seems not a problem so far at least on evergreen
+ if (!check_latency(n))
+ return 0;
+#endif
+
unsigned avail_slots = rt.avail_slots();
if (n->is_alu_packed()) {
@@ -1606,6 +1621,8 @@ bool post_scheduler::prepare_alu_group() {
alu_group_tracker &rt = alu.grp();
+ latency_check_failed = false;
+
unsigned i1 = 0;
PSC_DUMP(
@@ -1634,7 +1651,6 @@ bool post_scheduler::prepare_alu_group() {
sblog << "\n";
);
-
unsigned cnt = try_add_instruction(n);
if (!cnt)
@@ -1970,4 +1986,60 @@ void rp_gpr_tracker::dump() {
}
}
+void post_scheduler::update_prev_array_read(alu_node* n) {
+ for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
+ value *v = *I;
+
+ if (!v || !v->array)
+ continue;
+
+ prev_array_read.push_back(v);
+ }
+}
+
+bool post_scheduler::check_latency(node* n) {
+ for (vvec::iterator I = n->dst.begin(), E = n->dst.end(); I != E; ++I) {
+ value *d = *I;
+
+ if (!d || !d->array)
+ continue;
+
+ if (!check_value_latency(d))
+ return false;
+ }
+ return true;
+}
+
+bool post_scheduler::check_value_latency(value* v) {
+ for (vvec::iterator I = prev_array_read.begin(), E = prev_array_read.end();
+ I != E; ++I) {
+ value *r = *I;
+
+ if (r->array == v->array) {
+ bool rel_write = v->is_rel();
+ bool rel_read = r->is_rel();
+
+ if (rel_write ^ rel_read) {
+ latency_check_failed = true;
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+void post_scheduler::emit_nop_group() {
+ alu_node * a = sh.create_alu();
+ a->bc.set_op(ALU_OP0_NOP);
+
+ alu_group_tracker &rt = alu.grp();
+ if (!rt.try_reserve(a)) {
+ sblog << "can't emit NOP group : ";
+ dump::dump_op(a);
+ sblog << "\n";
+ }
+
+ alu.emit_group();
+}
+
} // namespace r600_sb
diff --git a/src/gallium/drivers/r600/sb/sb_sched.h b/src/gallium/drivers/r600/sb/sb_sched.h
index a74484f50b..40e8b15c9d 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.h
+++ b/src/gallium/drivers/r600/sb/sb_sched.h
@@ -254,11 +254,15 @@ class post_scheduler : public pass {
val_set cleared_interf;
+ vvec prev_array_read;
+ bool latency_check_failed;
+
public:
post_scheduler(shader &sh) : pass(sh),
ready(), ready_copies(), pending(), cur_bb(),
- live(), ucm(), alu(sh), regmap(), cleared_interf() {}
+ live(), ucm(), alu(sh), regmap(), cleared_interf(),
+ prev_array_read(), latency_check_failed() {}
virtual int run();
void run_on(container_node *n);
@@ -317,6 +321,11 @@ public:
void emit_clause();
void process_ready_copies();
+
+ void update_prev_array_read(alu_node *n);
+ bool check_latency(node *n);
+ bool check_value_latency(value *v);
+ void emit_nop_group();
};
} // namespace r600_sb
diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp
index 2be117df2e..24433649e7 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.cpp
+++ b/src/gallium/drivers/r600/sb/sb_shader.cpp
@@ -31,23 +31,22 @@
namespace r600_sb {
shader::shader(sb_context &sctx, shader_target t, unsigned id, bool direct_tgsi)
-: ctx(sctx), next_temp_value_index(temp_regid_offset), pred_sels(),
- regions(), inputs(), undef(), val_pool(sizeof(value)),
- pool(), all_nodes(), bc(sctx.hw_class_bit()), src_stats(), opt_stats(),
- errors(), optimized(), id(id),
- coal(*this), bbs(),
- target(t), vt(ex), ex(*this), root(),
- compute_interferences(),
- has_alu_predication(), uses_gradients(), safe_math(), ngpr(), nstack(),
- direct_tgsi(direct_tgsi) {}
+ : ctx(sctx), next_temp_value_index(temp_regid_offset), pred_sels(),
+ regions(), inputs(), undef(), val_pool(sizeof(value)), pool(),
+ all_nodes(), bc(sctx.hw_class_bit()), src_stats(), opt_stats(),
+ errors(), optimized(), id(id), coal(*this), bbs(), target(t),
+ vt(ex), ex(*this), root(), compute_interferences(),
+ has_alu_predication(), uses_gradients(), safe_math(), ngpr(),
+ nstack(), direct_tgsi(direct_tgsi) {
+}
bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
unsigned slot_flags = ctx.alu_slots(n->bc.op);
unsigned slot = n->bc.dst_chan;
- if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) &&
- (slot_flags & AF_S))
+ if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot])
+ && (slot_flags & AF_S))
slot = SLOT_TRANS;
if (slots[slot])
@@ -59,7 +58,7 @@ bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
}
void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
- bool src) {
+ bool src) {
unsigned chan = 0;
while (comp_mask) {
if (comp_mask & 1) {
@@ -72,7 +71,7 @@ void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
if (v->array && !v->array->gpr) {
// if pinned value can be accessed with indirect addressing
// pin the entire array to its original location
- v->array->gpr = v->array->pin_gpr;
+ v->array->gpr = v->array->base_sel;
}
vec.push_back(v);
}
@@ -81,16 +80,49 @@ void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
}
}
+void shader::add_pinned_inputs(vvec& vec, value_kind kind, unsigned sel,
+ unsigned comp_mask, bool src,
+ unsigned pin_gpr_sel) {
+ unsigned chan = 0;
+ while (comp_mask) {
+ if (comp_mask & 1) {
+ value *v = get_reg_value(kind, src, sel, chan, false);
+ if (!v->array) {
+ v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN);
+ v->gpr = v->pin_gpr = sel_chan(pin_gpr_sel, chan);
+ v->fix();
+ }
+/* if (v->array && !v->array->gpr) {
+ // if pinned value can be accessed with indirect addressing
+ // pin the entire array to its original location
+ v->array->gpr = sel_chan(
+ pin_gpr_sel - (sel - v->array->base_sel), chan);
+ }
+*/ vec.push_back(v);
+ }
+ comp_mask >>= 1;
+ ++chan;
+ }
+}
+
cf_node* shader::create_clause(node_subtype nst) {
cf_node *n = create_cf();
n->subtype = nst;
switch (nst) {
- case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
- case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
- case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
- default: assert(!"invalid clause type"); break;
+ case NST_ALU_CLAUSE:
+ n->bc.set_op(CF_OP_ALU);
+ break;
+ case NST_TEX_CLAUSE:
+ n->bc.set_op(CF_OP_TEX);
+ break;
+ case NST_VTX_CLAUSE:
+ n->bc.set_op(CF_OP_VTX);
+ break;
+ default:
+ assert(!"invalid clause type");
+ break;
}
n->bc.barrier = 1;
@@ -127,9 +159,11 @@ alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) {
return n;
}
-value* shader::get_value(value_kind kind, sel_chan id,
- unsigned version) {
- unsigned key = (kind << 28) | (version << 16) | id;
+value* shader::get_value(value_kind kind, sel_chan id, unsigned version) {
+ unsigned key = (kind << 28) | (version << 14) | id;
+ assert((id & ((1 << 14) - 1)) == id);
+ assert((version & ((1 << 14) - 1)) == version);
+
value_map::iterator i = reg_values.find(key);
if (i != reg_values.end()) {
return i->second;
@@ -149,7 +183,7 @@ void shader::fill_array_values(rel_array *a, vvec &vv) {
vv.resize(sz);
for (unsigned i = 0; i < a->array_size; ++i) {
vv[i] = get_reg_value(a->kind, true, a->base_sel.sel() + i,
- a->base_sel.chan(), false);
+ a->base_sel.chan(), false);
}
}
@@ -176,19 +210,19 @@ value* shader::get_reg_value(value_kind kind, bool src, unsigned sel,
return v;
}
-value* shader::create_temp_value() {
- sel_chan id(++next_temp_value_index, 0);
+value* shader::create_temp_value(int chan) {
+ sel_chan id(++next_temp_value_index, chan);
return get_value(VLK_TEMP, id, 0);
}
value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) {
return get_ro_value(kcache_values, VLK_KCACHE,
- sel_chan((bank << 12) | index, chan));
+ sel_chan((bank << 12) | index, chan));
}
void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
if (inputs.size() <= gpr)
- inputs.resize(gpr+1);
+ inputs.resize(gpr + 1);
shader_input &i = inputs[gpr];
i.preloaded = preloaded;
@@ -209,8 +243,8 @@ void shader::init_call_fs(cf_node* cf) {
assert(target == TARGET_VS);
- for(inputs_vec::const_iterator I = inputs.begin(),
- E = inputs.end(); I != E; ++I, ++gpr) {
+ for (inputs_vec::const_iterator I = inputs.begin(), E = inputs.end();
+ I != E; ++I, ++gpr) {
if (!I->preloaded)
add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false);
else
@@ -225,7 +259,8 @@ void shader::set_undef(val_set& s) {
val_set &vs = s;
- for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) {
+ for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E;
+ ++I) {
value *v = *I;
assert(!v->is_readonly() && !v->is_rel());
@@ -260,14 +295,14 @@ alu_node* shader::create_alu() {
alu_group_node* shader::create_alu_group() {
alu_group_node* n =
- new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
+ new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
all_nodes.push_back(n);
return n;
}
alu_packed_node* shader::create_alu_packed() {
alu_packed_node* n =
- new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
+ new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
all_nodes.push_back(n);
return n;
}
@@ -288,33 +323,34 @@ fetch_node* shader::create_fetch() {
}
region_node* shader::create_region() {
- region_node *n = new (pool.allocate(sizeof(region_node)))
- region_node(regions.size());
+ region_node *n = new (pool.allocate(sizeof(region_node))) region_node(
+ regions.size());
regions.push_back(n);
all_nodes.push_back(n);
return n;
}
depart_node* shader::create_depart(region_node* target) {
- depart_node* n = new (pool.allocate(sizeof(depart_node)))
- depart_node(target, target->departs.size());
+ depart_node* n = new (pool.allocate(sizeof(depart_node))) depart_node(
+ target, target->departs.size());
target->departs.push_back(n);
all_nodes.push_back(n);
return n;
}
repeat_node* shader::create_repeat(region_node* target) {
- repeat_node* n = new (pool.allocate(sizeof(repeat_node)))
- repeat_node(target, target->repeats.size() + 1);
+ repeat_node* n = new (pool.allocate(sizeof(repeat_node))) repeat_node(
+ target, target->repeats.size() + 1);
target->repeats.push_back(n);
all_nodes.push_back(n);
return n;
}
container_node* shader::create_container(node_type nt, node_subtype nst,
- node_flags flags) {
- container_node *n = new (pool.allocate(sizeof(container_node)))
- container_node(nt, nst, flags);
+ node_flags flags) {
+ container_node *n =
+ new (pool.allocate(sizeof(container_node))) container_node(nt, nst,
+ flags);
all_nodes.push_back(n);
return n;
}
@@ -342,12 +378,12 @@ value* shader::get_const_value(const literal &v) {
}
shader::~shader() {
- for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end();
- I != E; ++I)
+ for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end(); I != E;
+ ++I)
(*I)->~node();
for (rel_array_vec::iterator I = rel_arrays.begin(), E = rel_arrays.end();
- I != E; ++I) {
+ I != E; ++I) {
delete *I;
}
}
@@ -371,29 +407,32 @@ value* shader::get_value_version(value* v, unsigned ver) {
rel_array* shader::get_rel_array(value_kind kind, unsigned sel, unsigned chan) {
- for (regarray_vec::iterator I = rel_arrays.begin(),
- E = rel_arrays.end(); I != E; ++I) {
+ for (regarray_vec::iterator I = rel_arrays.begin(), E = rel_arrays.end();
+ I != E; ++I) {
rel_array* a = *I;
if (kind != a->kind)
continue;
unsigned achan = a->base_sel.chan();
unsigned areg = a->base_sel.sel();
- if (achan == chan && (sel >= areg && sel < areg+a->array_size))
+ if (achan == chan && (sel >= areg && sel < areg + a->array_size))
return a;
}
return NULL;
}
-void shader::add_rel_array(value_kind kind, unsigned sel_start, unsigned sel_count,
- unsigned comp_mask, unsigned array_id) {
+void shader::add_rel_array(value_kind kind, unsigned sel_start,
+ unsigned sel_count, unsigned comp_mask,
+ unsigned array_id) {
unsigned chan = 0;
while (comp_mask) {
if (comp_mask & 1) {
- rel_array *a = new rel_array(kind,
- sel_chan(sel_start, chan), sel_count, array_id);
+ rel_array *a = new rel_array(kind, sel_chan(sel_start, chan),
+ sel_count, array_id);
- SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_sel
- << " [" << a->array_size << "]\n";
+ SB_DUMP_PASS(
+ sblog << "add_gpr_array: @" << a->base_sel << " ["
+ << a->array_size << "]\n"
+ ;
);
rel_arrays.push_back(a);
@@ -429,13 +468,18 @@ std::string shader::get_full_target_name() {
const char* shader::get_shader_target_name() {
switch (target) {
- case TARGET_VS: return "VS";
- case TARGET_PS: return "PS";
- case TARGET_GS: return "GS";
- case TARGET_COMPUTE: return "COMPUTE";
- case TARGET_FETCH: return "FETCH";
- default:
- return "INVALID_TARGET";
+ case TARGET_VS:
+ return "VS";
+ case TARGET_PS:
+ return "PS";
+ case TARGET_GS:
+ return "GS";
+ case TARGET_COMPUTE:
+ return "COMPUTE";
+ case TARGET_FETCH:
+ return "FETCH";
+ default:
+ return "INVALID_TARGET";
}
}
@@ -452,7 +496,6 @@ void shader::simplify_dep_rep(node* dr) {
dr->parent->cut(dr->next, NULL);
}
-
// FIXME this is used in some places as the max non-temp gpr,
// (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
unsigned shader::first_temp_gpr() {
@@ -524,10 +567,8 @@ void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
if (inside_bb && !last_inside_bb)
bb_start = I;
else if (!inside_bb) {
- if (last_inside_bb
- && I->type != NT_REPEAT
- && I->type != NT_DEPART
- && I->type != NT_IF) {
+ if (last_inside_bb && I->type != NT_REPEAT && I->type != NT_DEPART
+ && I->type != NT_IF) {
bb_node *bb = create_bb(bbs.size(), loop_level);
bbs.push_back(bb);
n->insert_node_before(*bb_start, bb);
@@ -543,7 +584,7 @@ void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
}
create_bbs(static_cast<container_node*>(k), bbs,
- loop_level + loop);
+ loop_level + loop);
}
}
@@ -557,7 +598,7 @@ void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
bb_node *bb = create_bb(bbs.size(), loop_level);
bbs.push_back(bb);
if (n->empty())
- n->push_back(bb);
+ n->push_back(bb);
else {
n->insert_node_before(*bb_start, bb);
if (bb_start != n->end())
@@ -582,22 +623,22 @@ void shader::expand_bbs(bbs_vec &bbs) {
sched_queue_id shader::get_queue_id(node* n) {
switch (n->subtype) {
- case NST_ALU_INST:
- case NST_ALU_PACKED_INST:
- case NST_COPY:
- case NST_PSI:
- return SQ_ALU;
- case NST_FETCH_INST: {
- fetch_node *f = static_cast<fetch_node*>(n);
- if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
- return SQ_VTX;
- return SQ_TEX;
- }
- case NST_CF_INST:
- return SQ_CF;
- default:
- assert(0);
- return SQ_NUM;
+ case NST_ALU_INST:
+ case NST_ALU_PACKED_INST:
+ case NST_COPY:
+ case NST_PSI:
+ return SQ_ALU;
+ case NST_FETCH_INST: {
+ fetch_node *f = static_cast<fetch_node*>(n);
+ if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
+ return SQ_VTX;
+ return SQ_TEX;
+ }
+ case NST_CF_INST:
+ return SQ_CF;
+ default:
+ assert(0);
+ return SQ_NUM;
}
}
@@ -642,10 +683,9 @@ void shader_stats::accumulate(shader_stats& s) {
void shader_stats::dump() {
sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
- << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
- << ", alu:" << alu << ", fetch:" << fetch
- << ", fetch clauses:" << fetch_clauses
- << ", cf:" << cf;
+ << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
+ << ", alu:" << alu << ", fetch:" << fetch << ", fetch clauses:"
+ << fetch_clauses << ", cf:" << cf;
if (shaders > 1)
sblog << ", shaders:" << shaders;
@@ -655,7 +695,7 @@ void shader_stats::dump() {
static void print_diff(unsigned d1, unsigned d2) {
if (d1)
- sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
+ sblog << ((int) d2 - (int) d1) * 100 / (int) d1 << "%";
else if (d2)
sblog << "N/A";
else
@@ -663,15 +703,24 @@ static void print_diff(unsigned d1, unsigned d2) {
}
void shader_stats::dump_diff(shader_stats& s) {
- sblog << "dw:"; print_diff(ndw, s.ndw);
- sblog << ", gpr:" ; print_diff(ngpr, s.ngpr);
- sblog << ", stk:" ; print_diff(nstack, s.nstack);
- sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups);
- sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses);
- sblog << ", alu:" ; print_diff(alu, s.alu);
- sblog << ", fetch:" ; print_diff(fetch, s.fetch);
- sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses);
- sblog << ", cf:" ; print_diff(cf, s.cf);
+ sblog << "dw:";
+ print_diff(ndw, s.ndw);
+ sblog << ", gpr:";
+ print_diff(ngpr, s.ngpr);
+ sblog << ", stk:";
+ print_diff(nstack, s.nstack);
+ sblog << ", alu groups:";
+ print_diff(alu_groups, s.alu_groups);
+ sblog << ", alu clauses: ";
+ print_diff(alu_clauses, s.alu_clauses);
+ sblog << ", alu:";
+ print_diff(alu, s.alu);
+ sblog << ", fetch:";
+ print_diff(fetch, s.fetch);
+ sblog << ", fetch clauses:";
+ print_diff(fetch_clauses, s.fetch_clauses);
+ sblog << ", cf:";
+ print_diff(cf, s.cf);
sblog << "\n";
}
diff --git a/src/gallium/drivers/r600/sb/sb_shader.h b/src/gallium/drivers/r600/sb/sb_shader.h
index 94e1470192..abc2d6bb34 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.h
+++ b/src/gallium/drivers/r600/sb/sb_shader.h
@@ -277,7 +277,7 @@ public:
coalescer coal;
- static const unsigned temp_regid_offset = 512;
+ static const unsigned temp_regid_offset = 0;
bbs_vec bbs;
@@ -308,7 +308,7 @@ public:
value* get_const_value(const literal & v);
value* get_special_value(unsigned sv_id, unsigned version = 0);
- value* create_temp_value();
+ value* create_temp_value(int chan = 0);
value* get_reg_value(value_kind kind, bool src, unsigned reg,
unsigned chan, bool rel, value *r = NULL,
unsigned arr_id = 0);
@@ -323,7 +323,12 @@ public:
value* get_value_version(value* v, unsigned ver);
void init();
- void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, bool src);
+ void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
+ bool src);
+
+ void add_pinned_inputs(vvec& vec, value_kind kind, unsigned sel,
+ unsigned comp_mask, bool src,
+ unsigned pin_gpr_sel);
void dump_ir();
diff --git a/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp b/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp
index 3ad628bb68..6df2979452 100644
--- a/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp
@@ -201,8 +201,11 @@ bool ssa_rename::visit(alu_node& n, bool enter) {
if (!n.dst.empty() && n.dst[0]) {
// FIXME probably use separate pass for such things
- if ((n.bc.op_ptr->flags & AF_INTERP) || n.bc.op == ALU_OP2_CUBE)
+ if ((n.bc.op_ptr->flags & AF_INTERP) || n.bc.op == ALU_OP2_CUBE) {
n.dst[0]->flags |= VLF_PIN_CHAN;
+ n.dst[0]->pin_gpr = sel_chan(n.dst[0]->pin_gpr.sel(),
+ n.bc.slot);
+ }
}
}
return true;
diff --git a/src/gallium/drivers/r600/sb/sb_tgsi.cpp b/src/gallium/drivers/r600/sb/sb_tgsi.cpp
index 2e4f778a81..0f0d6df745 100644
--- a/src/gallium/drivers/r600/sb/sb_tgsi.cpp
+++ b/src/gallium/drivers/r600/sb/sb_tgsi.cpp
@@ -79,7 +79,7 @@ const tgsi_translator::tgsi_inst_info tgsi_translator::tgsi_info_table[TGSI_OPCO
/* 33 */ TI_DESC(ABS, ALU_OP1_MOV, ti_alu, 0),
/* 34 */ TI_DESC(RCC, 0, ti_unsupported, 0),
/* 35 */ TI_DESC(DPH, 0, ti_dot, 0),
- /* 36 */ TI_DESC(COS, 0, ti_unsupported, 0),
+ /* 36 */ TI_DESC(COS, ALU_OP1_COS, ti_trig, 0),
/* 37 */ TI_DESC(DDX, FETCH_OP_GET_GRADIENTS_H, ti_tex, 0),
/* 38 */ TI_DESC(DDY, FETCH_OP_GET_GRADIENTS_V, ti_tex, 0),
/* 39 */ TI_DESC(KILL, 0, ti_kill, 0),
@@ -91,7 +91,7 @@ const tgsi_translator::tgsi_inst_info tgsi_translator::tgsi_info_table[TGSI_OPCO
/* 45 */ TI_DESC(SEQ, ALU_OP2_SETE, ti_alu, 0),
/* 46 */ TI_DESC(SFL, 0, ti_unsupported, 0),
/* 47 */ TI_DESC(SGT, ALU_OP2_SETGT, ti_alu, 0),
- /* 48 */ TI_DESC(SIN, 0, ti_unsupported, 0),
+ /* 48 */ TI_DESC(SIN, ALU_OP1_SIN, ti_trig, 0),
/* 49 */ TI_DESC(SLE, ALU_OP2_SETGE, ti_alu, TIF_ALU_SWAPSRC01),
/* 50 */ TI_DESC(SNE, ALU_OP2_SETNE, ti_alu, 0),
/* 51 */ TI_DESC(STR, 0, ti_unsupported, 0),
@@ -236,6 +236,7 @@ const tgsi_translator::tgsi_inst_info tgsi_translator::tgsi_info_table[TGSI_OPCO
shader* tgsi_translator::translate() {
shader_target target;
+ int r;
tokens = ps->selector->tokens;
tgsi_parse_init(&parse, tokens);
@@ -264,10 +265,16 @@ shader* tgsi_translator::translate() {
sh->init();
current = sh->root;
- int r = parse_tokens();
- tgsi_parse_free(&parse);
+ if ((r = parse_declarations()))
+ return NULL;
emit_inputs();
+
+ if ((r = parse_instructions()))
+ return NULL;
+
+ tgsi_parse_free(&parse);
+
emit_exports();
update_pipe_shader();
@@ -279,7 +286,7 @@ shader* tgsi_translator::translate() {
return sh;
}
-int tgsi_translator::parse_tokens() {
+int tgsi_translator::parse_declarations() {
int r;
while (!tgsi_parse_end_of_tokens(&parse)) {
@@ -295,7 +302,7 @@ int tgsi_translator::parse_tokens() {
r = parse_immediate();
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- r = parse_instruction();
+ return 0;
break;
default:
assert(!"unexpected tgsi token type");
@@ -307,6 +314,28 @@ int tgsi_translator::parse_tokens() {
return 0;
}
+int tgsi_translator::parse_instructions() {
+ int r;
+
+ while (true) {
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ r = parse_instruction();
+ break;
+ default:
+ assert(!"unexpected tgsi token type");
+ return -1;
+ }
+ if (r)
+ return r;
+
+ if (tgsi_parse_end_of_tokens(&parse))
+ break;
+ tgsi_parse_token(&parse);
+ };
+ return 0;
+}
+
int tgsi_translator::parse_property() {
tgsi_full_property *property = &parse.FullToken.FullProperty;
@@ -349,6 +378,7 @@ int tgsi_translator::parse_declaration() {
interp_mask |= (1 << 2);
input[i].d.spi_sid = spi_sid(input[i].d.name, input[i].d.sid);
+
switch (input[i].d.name) {
case TGSI_SEMANTIC_FACE:
face_input = i;
@@ -395,9 +425,9 @@ int tgsi_translator::parse_declaration() {
break;
case TGSI_FILE_TEMPORARY:
- if (d->Array.ArrayID) {
+ if (d->Array.ArrayID && d->Range.Last > d->Range.First) {
sh->add_rel_array(VLK_TGSI_TEMP, d->Range.First,
- d->Range.Last - d->Range.First + 1, 0xF, d->Array.ArrayID);
+ d->Range.Last - d->Range.First + 1, 0xF, d->Array.ArrayID);
}
break;
@@ -408,9 +438,11 @@ int tgsi_translator::parse_declaration() {
case TGSI_FILE_SYSTEM_VALUE:
if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+ instanceid_index = d->Range.First;
break;
} else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
- break;
+ vertexid_index = d->Range.First;
+ break;
default:
assert(!"unexpected tgsi declaration");
return -1;
@@ -479,6 +511,7 @@ int tgsi_translator::parse_instruction() {
args.dst.rel_addr_index = inst->Dst[0].Indirect.Index;
args.dst.rel_addr_chan = inst->Dst[0].Indirect.Swizzle;
args.dst.rel_array_id = inst->Dst[0].Indirect.ArrayID;
+ indirect_vlk |= (1 << args.dst.kind);
}
}
@@ -486,13 +519,29 @@ int tgsi_translator::parse_instruction() {
clamp = inst->Instruction.Saturate;
args.nsrc = inst->Instruction.NumSrcRegs;
+ unsigned nconst = 0;
+ unsigned nliteral = 0;
+
for (i = 0; i < args.nsrc; ++i) {
tgsi_arg &a = args.src[i];
a.file = inst->Src[i].Register.File;
a.sel = inst->Src[i].Register.Index;
- if (a.file != TGSI_FILE_SAMPLER) {
- a.kind = file_to_value_kind(args.src[i].file);
+ if (a.file == TGSI_FILE_SYSTEM_VALUE) {
+ if (a.sel == instanceid_index) {
+ a.kind = VLK_REG;
+ a.sel = 0;
+ FILLV4(a.swz, SEL_W);
+ } else if (a.sel == vertexid_index) {
+ a.kind = VLK_REG;
+ a.sel = 0;
+ FILLV4(a.swz, SEL_X);
+ } else {
+ assert(!"unexpected system value");
+ }
+ } else if (a.file != TGSI_FILE_SAMPLER) {
+
+ a.kind = file_to_value_kind(a.file);
a.rel = inst->Src[i].Register.Indirect;
a.neg = inst->Src[i].Register.Negate;
a.abs = inst->Src[i].Register.Absolute;
@@ -501,17 +550,29 @@ int tgsi_translator::parse_instruction() {
a.swz[2] = inst->Src[i].Register.SwizzleZ;
a.swz[3] = inst->Src[i].Register.SwizzleW;
+ if (a.kind == VLK_KCACHE && inst->Src[i].Register.Dimension)
+ a.kc_bank = inst->Src[i].Dimension.Index;
+
if (a.rel) {
assert(inst->Src[i].Indirect.File == TGSI_FILE_ADDRESS);
a.rel_addr_index = inst->Src[i].Indirect.Index;
a.rel_addr_chan = inst->Src[i].Indirect.Swizzle;
a.rel_array_id = inst->Src[i].Indirect.ArrayID;
+ indirect_vlk |= (1 << a.kind);
if (a.file == TGSI_FILE_CONSTANT) {
- a.values = fetch_rel_const(a);
- a.rel = 0;
- a.kind = VLK_TEMP;
+ fetch_rel_const(a);
}
+ } else if (a.file == TGSI_FILE_CONSTANT) {
+ if (nconst == 1) {
+ split_src_arg(a);
+ } else
+ ++nconst;
+ } else if (a.file == TGSI_FILE_IMMEDIATE) {
+ if (nliteral == 1) {
+ split_src_arg(a);
+ } else
+ ++nliteral;
}
}
}
@@ -549,9 +610,65 @@ int tgsi_translator::emit_fake_export(unsigned type) {
}
int tgsi_translator::emit_exports() {
- int i, k, n;
+ int i, j, k, n;
int next_pos = 60, next_pixel = 0, next_param = 0;
+ if (clip_vertex_write) {
+ int cd = noutput;
+
+ noutput += 2;
+ output[cd].d.name = TGSI_SEMANTIC_CLIPDIST;
+ output[cd].tgsi_index = cd;
+ output[cd + 1].d.name = TGSI_SEMANTIC_CLIPDIST;
+ output[cd + 1].tgsi_index = cd + 1;
+
+ output[cv_output].d.spi_sid = 0;
+ clip_dist_write = 0xFF;
+
+ for (i = 0; i < 8; i++) {
+ int oreg = i >> 2, ochan = i & 3;
+ value *o = get_tgsi_value(VLK_TGSI_OUTPUT, cd + oreg, ochan);
+ alu_packed_node *p = sh->create_alu_packed();
+
+ for (j = 0; j < 4; j++) {
+ value *cvo = get_tgsi_value(VLK_TGSI_OUTPUT, cv_output, j);
+ value *cp = sh->get_kcache_value(R600_UCP_CONST_BUFFER, i, j);
+ alu_node *a = build_alu(ALU_OP2_DOT4, j == ochan ? o : NULL, 0,
+ asrc(cvo), asrc(cp));
+ a->bc.slot = j;
+ p->push_back(a);
+ }
+ emit_node(p);
+ }
+ }
+
+ pipe_stream_output_info &so = ps->selector->so;
+ for (i = 0; i < (int)so.num_outputs; i++) {
+ int nc = so.output[i].num_components;
+ unsigned start_comp = so.output[i].start_component, real_start;
+ unsigned index = so.output[i].register_index;
+ unsigned dst_offset = so.output[i].dst_offset;
+ unsigned buf = so.output[i].output_buffer;
+ unsigned op = so.output[i].output_buffer;
+
+ assert(buf < 4);
+ op += ctx.is_egcm() ? CF_OP_MEM_STREAM0_BUF0 : CF_OP_MEM_STREAM0;
+ real_start = (dst_offset < start_comp) ? 0 : start_comp;
+
+ cf_node *ms = sh->create_cf(op);
+ ms->bc.elem_size = nc;
+ ms->bc.array_base = dst_offset - real_start;
+ ms->bc.type = MEM_WRITE;
+ ms->bc.array_size = 0xFFF;
+ ms->src.resize(4);
+
+ for (j = 0; j < nc; ++j) {
+ value *v = get_tgsi_value(VLK_TGSI_OUTPUT, index, start_comp + j);
+ ms->src[real_start + j] = v;
+ }
+ emit_node(ms);
+ }
+
for (i = 0; i < noutput; ++i) {
shader_io &o = output[i];
unsigned ti = o.tgsi_index;
@@ -561,7 +678,8 @@ int tgsi_translator::emit_exports() {
case TARGET_VS:
switch (o.d.name) {
case TGSI_SEMANTIC_CLIPDIST:
- emit_export(o, EXP_PARAM, next_param++, swz, ti);
+ if (o.d.spi_sid)
+ emit_export(o, EXP_PARAM, next_param++, swz, ti);
/* fall through */
case TGSI_SEMANTIC_POSITION:
case TGSI_SEMANTIC_PSIZE:
@@ -573,6 +691,8 @@ int tgsi_translator::emit_exports() {
swz[3] = 5; /* x001 */
emit_export(o, EXP_PARAM, next_param++, swz, ti);
break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ break;
default:
emit_export(o, EXP_PARAM, next_param++, swz, ti);
}
@@ -583,13 +703,13 @@ int tgsi_translator::emit_exports() {
switch (o.d.name) {
case TGSI_SEMANTIC_COLOR:
- if (next_pixel && next_pixel > key.nr_cbufs)
+ if (next_pixel && next_pixel >= key.nr_cbufs)
continue;
swz[3] = key.alpha_to_one ? 5 : 3;
n = (fs_write_all && ctx.is_egcm() && key.nr_cbufs) ?
key.nr_cbufs : 1;
for (k = 0; k < n; k++) {
- emit_export(o, EXP_PIXEL, next_pixel++, swz, ti++);
+ emit_export(o, EXP_PIXEL, next_pixel++, swz, ti);
}
nr_ps_color_exports += n;
break;
@@ -671,27 +791,35 @@ vvec tgsi_translator::get_vector_values(value_kind kind, unsigned tgsi_index,
}
int tgsi_translator::emit_inputs() {
- int i, k, nparam = 0;
+ int i, nparam = 0, gpr_reserved = 0;
+
+ // XXX temporary workaround for lack of proper array support for inputs
+ if (ninput)
+ sh->add_rel_array(VLK_TGSI_INPUT, 0, ninput, 0xF, 0);
+
switch (sh->target) {
case TARGET_VS: {
cf_node *c = sh->create_cf(CF_OP_CALL_FS);
- sh->root->push_front(c);
- sh->add_pinned_gpr_values(c->src, 0, 0xF, true);
+ c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
+ sh->add_pinned_gpr_values(c->src, 0, 0xF, true);
sh->add_input(0, true, 0xF);
+ // pin input arrays
+ for (i = 0; i < 4; ++i) {
+ rel_array *a = sh->get_rel_array(VLK_TGSI_INPUT, 0, i);
+ if (a)
+ a->gpr = sel_chan(1, i);
+ }
+
for (i = 0; i < ninput; ++i) {
shader_io &in = input[i];
vvec dv = get_vector_values(VLK_TGSI_INPUT, in.tgsi_index);
-
- for (k = 0; k < 4; ++k) {
- dv[k]->flags |= VLF_PIN_REG | VLF_PIN_CHAN;
- dv[k]->gpr = dv[k]->pin_gpr = sel_chan(i + 1, k);
- }
c->dst.insert(c->dst.end(), dv.begin(), dv.end());
}
- }
+ emit_node(c);
break;
+ }
case TARGET_PS:
if (ctx.is_egcm()) {
unsigned ij_pairs = ((interp_mask & 1) + (interp_mask >> 1))
@@ -705,18 +833,78 @@ int tgsi_translator::emit_inputs() {
++gpr;
mask >>= 4;
}
+
+ gpr_reserved = gpr;
+ }
+
+ // pin input arrays
+ for (i = 0; i < 4; ++i) {
+ rel_array *a = sh->get_rel_array(VLK_TGSI_INPUT, 0, i);
+ if (a)
+ a->gpr = sel_chan(gpr_reserved, i);
+ }
+
+ if (key.color_two_side && colors_used) {
+ two_side = 1;
+
+ if (face_input == -1) {
+ i = ninput++;
+ input[i].d.name = TGSI_SEMANTIC_FACE;
+ input[i].d.spi_sid = 0;
+ input[i].tgsi_index = i;
+ face_input = i;
+ }
}
for (i = 0; i < ninput; ++i) {
shader_io &in = input[i];
+ in.d.gpr = gpr_reserved++;
+
if (ctx.is_egcm() && in.d.spi_sid) {
+ in.d.lds_pos = nparam++;
if (in.d.interpolate != TGSI_INTERPOLATE_CONSTANT) {
- in.d.lds_pos = nparam++;
in.d.ij_index = get_ij(in);
- sh->root->push_front(build_interp(in, 0));
- sh->root->push_front(build_interp(in, 1));
+
+ emit_node(build_interp(in, 1));
+ emit_node(build_interp(in, 0));
} else {
- sh->root->push_front(build_interp_flat(in));
+ emit_node(build_interp_flat(in));
+ }
+ } else {
+ sh->add_pinned_inputs(sh->root->dst, VLK_TGSI_INPUT,
+ in.tgsi_index, 0xF, false, in.d.gpr);
+
+ if (fragcoord_input == i) {
+ value* w = get_tgsi_value(VLK_TGSI_INPUT, i, SEL_W);
+ emit_alu(ALU_OP1_RECIP_IEEE, w, 0, asrc(w));
+ }
+ }
+
+ if (two_side) {
+ if (in.d.name == TGSI_SEMANTIC_COLOR) {
+ int ni = ninput++;
+ shader_io &nin = input[ni];
+ nin = in;
+ nin.d.name = TGSI_SEMANTIC_BCOLOR;
+ nin.d.spi_sid = spi_sid(nin.d.name, nin.d.sid);
+ // back_color_input actually means front_color_input here
+ nin.d.back_color_input = i;
+ nin.tgsi_index = ni;
+ } else if (in.d.name == TGSI_SEMANTIC_BCOLOR) {
+ // both inputs are interpolated now, so select the color
+ int k;
+ shader_io &fin = input[in.d.back_color_input];
+
+ for (k = 0; k < 4; ++k) {
+ value *face = sh->get_value(VLK_TGSI_INPUT,
+ sel_chan(input[face_input].tgsi_index, 0));
+ value *fv = sh->get_value(VLK_TGSI_INPUT,
+ sel_chan(fin.tgsi_index, k));
+ value *bv = sh->get_value(VLK_TGSI_INPUT,
+ sel_chan(in.tgsi_index, k));
+ emit_alu(ALU_OP3_CNDGT, fv, 0, asrc(face), asrc(fv),
+ asrc(bv));
+ }
}
}
}
@@ -789,9 +977,9 @@ value* tgsi_translator::get_arg_value(tgsi_arg &ta, unsigned chan) {
unsigned schan = ta.dst ? chan : ta.swz[chan];
if (ta.rel) {
value *r = get_tgsi_value(VLK_TGSI_ADDR, ta.rel_addr_index,
- ta.rel_addr_chan);
+ ta.rel_addr_chan);
ta.values[chan] = sh->get_reg_value(ta.kind, !ta.dst, ta.sel, schan,
- ta.rel, r, ta.rel_array_id);
+ ta.rel, r, ta.rel_array_id);
} else
ta.values[chan] = get_tgsi_value(ta.kind, ta.sel, schan);
}
@@ -806,17 +994,25 @@ value* tgsi_translator::get_arg_value(unsigned index, unsigned chan) {
int tgsi_translator::ti_alu() {
begin_group();
- if (info->tgsi_op == TGSI_OPCODE_SUB)
+ switch (info->tgsi_op) {
+ case TGSI_OPCODE_SUB:
args.src[1].neg = !args.src[1].neg;
+ break;
+ case TGSI_OPCODE_ABS:
+ args.src[0].neg = 0;
+ args.src[0].abs = 1;
+ break;
+ }
if (unlikely(info->flags & TIF_ALU_SWAPSRC01)) {
- FOREACH_CHAN {
- emit_alu(info->isa_op, tgsi_dst(ch), clamp,
- asrc(args.src[1], ch),
+ FOREACH_CHAN
+ {
+ emit_alu(info->isa_op, tgsi_dst(ch), clamp, asrc(args.src[1], ch),
asrc(args.src[0], ch));
}
} else {
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(info->isa_op, ch);
}
}
@@ -842,33 +1038,40 @@ int tgsi_translator::ti_dot() {
s1 = 1;
break;
default:
+ nc = 0;
assert(!"ti_dot: unexpected tgsi opcode");
}
- // XXX maybe use MUL/DOT instead of DOT4 for nc < 4
- FOREACH_CHAN {
- alu_packed_node *p = sh->create_alu_packed();
- alu_node *a;
- for (i = 0; i < nc - s1; ++i) {
- a = build_alu(ALU_OP2_DOT4, (i == ch) ? tgsi_dst(ch) : NULL, clamp,
- asrc(args.src[0], i), asrc(args.src[1], i));
- a->bc.slot = i;
- p->push_back(a);
- }
- if (s1) {
- a = build_alu(ALU_OP2_DOT4, (i == ch) ? tgsi_dst(ch) : NULL, clamp,
- asrc(literal(1.0f)), asrc(args.src[1], i));
- a->bc.slot = i++;
- p->push_back(a);
- }
- for (; i < 4; ++i) {
- a = build_alu(ALU_OP2_DOT4, (i == ch) ? tgsi_dst(ch) : NULL, clamp,
- asrc(literal(0)), asrc(literal(0)));
- a->bc.slot = i;
- p->push_back(a);
- }
- emit_node(p);
+ unsigned ch = __builtin_ctz(write_mask);
+ unsigned nwc = __builtin_popcount(write_mask);
+
+ value *t = nwc > 1 ? create_temp() : tgsi_dst(ch);
+
+ alu_packed_node *p = sh->create_alu_packed();
+ alu_node *a;
+ for (i = 0; i < nc - s1; ++i) {
+ a = build_alu(ALU_OP2_DOT4, (i == ch) ? t : NULL, clamp,
+ asrc(args.src[0], i), asrc(args.src[1], i));
+ a->bc.slot = i;
+ p->push_back(a);
+ }
+ if (s1) {
+ a = build_alu(ALU_OP2_DOT4, (i == ch) ? t : NULL, clamp,
+ asrc(literal(1.0f)), asrc(args.src[1], i));
+ a->bc.slot = i++;
+ p->push_back(a);
}
+ for (; i < 4; ++i) {
+ a = build_alu(ALU_OP2_DOT4, (i == ch) ? t : NULL, clamp,
+ asrc(literal(0)), asrc(literal(0)));
+ a->bc.slot = i;
+ p->push_back(a);
+ }
+ emit_node(p);
+
+ if (nwc > 1)
+ ti_replicate(t);
+
return 0;
}
@@ -880,7 +1083,8 @@ int tgsi_translator::ti_repl() {
args.src[0].neg = 0;
break;
}
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(info->isa_op, 0, ch);
}
return 0;
@@ -934,8 +1138,8 @@ alu_node* tgsi_translator::create_alu(unsigned op) {
a->bc.set_op(op);
a->bc.slot_flags = (alu_op_flags) ctx.alu_slots(a->bc.op_ptr);
if (a->bc.op_ptr->flags & AF_KILL) {
- a->flags |= NF_DONT_HOIST | NF_DONT_MOVE |
- NF_DONT_KILL | NF_SCHEDULE_EARLY;
+ a->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL
+ | NF_SCHEDULE_EARLY;
} else if (a->bc.op_ptr->flags & (AF_PRED | AF_MOVA)) {
a->flags |= NF_DONT_HOIST;
}
@@ -944,7 +1148,8 @@ alu_node* tgsi_translator::create_alu(unsigned op) {
int tgsi_translator::ti_trig() {
value *t = prepare_trig(asrc(args.src[0], 0));
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(info->isa_op, tgsi_dst(ch), clamp, t);
}
return 0;
@@ -967,7 +1172,7 @@ int tgsi_translator::ti_scs() {
}
value* tgsi_translator::prepare_trig(alu_src s) {
- static float half_inv_pi = 1.0 /(3.1415926535 * 2);
+ static float half_inv_pi = 1.0 / (3.1415926535 * 2);
static float double_pi = 3.1415926535 * 2;
static float neg_pi = -3.1415926535;
@@ -979,7 +1184,8 @@ value* tgsi_translator::prepare_trig(alu_src s) {
if (ctx.is_r600())
emit_alu(ALU_OP3_MULADD, t, 0, asrc(t), asrc(double_pi), asrc(neg_pi));
else
- emit_alu(ALU_OP2_ADD, t, 0, asrc(t), asrc(-0.5f));
+// emit_alu(ALU_OP2_ADD, t, 0, asrc(t), asrc(-0.5f));
+ emit_alu(ALU_OP3_MULADD, t, 0, asrc(t), asrc(1.0f), asrc(0.5f, 0, 1));
return t;
}
@@ -996,7 +1202,8 @@ int tgsi_translator::ti_exp() {
if (write_mask & (1 << SEL_Y))
emit_alu(ALU_OP1_FRACT, tgsi_dst(SEL_Y), clamp, asrc(args.src[0], 0));
if (write_mask & (1 << SEL_Z))
- emit_alu(ALU_OP1_EXP_IEEE, tgsi_dst(SEL_Z), clamp, asrc(args.src[0], 0));
+ emit_alu(ALU_OP1_EXP_IEEE, tgsi_dst(SEL_Z), clamp,
+ asrc(args.src[0], 0));
if (write_mask & (1 << SEL_W))
emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f));
end_group();
@@ -1008,9 +1215,7 @@ int tgsi_translator::ti_log() {
value *t2 = create_temp();
value *t3 = create_temp();
- alu_src s = asrc(args.src[0], 0);
- s.abs = 1;
- s.neg = 0;
+ alu_src s = asrc(args.src[0], 0, 1, 0);
if (write_mask & 0x7)
emit_alu(ALU_OP1_LOG_IEEE, t, 0, s);
@@ -1025,11 +1230,11 @@ int tgsi_translator::ti_log() {
if (write_mask & (1 << SEL_X))
emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), clamp, t2);
if (write_mask & (1 << SEL_Y))
- emit_alu(ALU_OP2_MUL, tgsi_dst(SEL_Y), clamp, s, asrc(t2));
+ emit_alu(ALU_OP2_MUL, tgsi_dst(SEL_Y), clamp, s, asrc(t3));
if (write_mask & (1 << SEL_Z))
emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), clamp, t);
if (write_mask & (1 << SEL_W))
- emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), 0, asrc(1.0f));
+ emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f));
end_group();
return 0;
}
@@ -1040,11 +1245,11 @@ int tgsi_translator::ti_dst() {
emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, asrc(1.0f));
if (write_mask & (1 << SEL_Y))
emit_alu(ALU_OP2_MUL, tgsi_dst(SEL_Y), clamp, asrc(args.src[0], SEL_Y),
- asrc(args.src[1], SEL_Y));
+ asrc(args.src[1], SEL_Y));
if (write_mask & (1 << SEL_Z))
emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), clamp, asrc(args.src[0], SEL_Z));
if (write_mask & (1 << SEL_W))
- emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), clamp, asrc(args.src[1], SEL_W));
+ emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), clamp, asrc(args.src[1], SEL_W));
end_group();
return 0;
}
@@ -1053,12 +1258,19 @@ int tgsi_translator::ti_lrp() {
vvec t;
create_temps(t, 4);
- FOREACH_CHAN {
- emit_alu(ALU_OP2_ADD, t[ch], 0, asrc(1.0f), asrc(args.src[0], ch, 0, 1));
+ FOREACH_CHAN
+ {
+ emit_alu(ALU_OP2_ADD, t[ch], 0, asrc(1.0f),
+ asrc(args.src[0], ch, 0, 1));
emit_alu(ALU_OP2_MUL, t[ch], 0, asrc(t[ch]), asrc(args.src[2], ch));
+ }
+ begin_group();
+ FOREACH_CHAN
+ {
emit_alu(ALU_OP3_MULADD, tgsi_dst(ch), clamp, asrc(args.src[0], ch),
- asrc(args.src[1], ch), asrc(t[ch]));
+ asrc(args.src[1], ch), asrc(t[ch]));
}
+ end_group();
return 0;
}
@@ -1074,7 +1286,8 @@ int tgsi_translator::ti_pow() {
int tgsi_translator::ti_replicate(value* t) {
begin_group();
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(ALU_OP1_MOV, tgsi_dst(ch), 0, asrc(t));
}
end_group();
@@ -1082,23 +1295,24 @@ int tgsi_translator::ti_replicate(value* t) {
}
int tgsi_translator::ti_xpd() {
- static const unsigned int src0_swizzle[] = {2, 0, 1};
- static const unsigned int src1_swizzle[] = {1, 2, 0};
+ static const unsigned int src0_swizzle[] = { 2, 0, 1 };
+ static const unsigned int src1_swizzle[] = { 1, 2, 0 };
vvec t;
create_temps(t, 3);
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
if (ch < SEL_W)
- emit_alu(ALU_OP2_MUL, t[ch], 0,
- asrc(args.src[0], src0_swizzle[ch]),
- asrc(args.src[1], src1_swizzle[ch]));
+ emit_alu(ALU_OP2_MUL, t[ch], 0, asrc(args.src[0], src0_swizzle[ch]),
+ asrc(args.src[1], src1_swizzle[ch]));
}
begin_group();
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
if (ch < SEL_W)
emit_alu(ALU_OP3_MULADD, tgsi_dst(ch), clamp,
- asrc(args.src[0], src1_swizzle[ch]),
- asrc(args.src[1], src0_swizzle[ch]), asrc(t[ch], 0, 1));
+ asrc(args.src[0], src1_swizzle[ch]),
+ asrc(args.src[1], src0_swizzle[ch]), asrc(t[ch], 0, 1));
else
emit_alu(ALU_OP1_MOV, tgsi_dst(ch), 0, asrc(1.0f));
}
@@ -1109,6 +1323,9 @@ int tgsi_translator::ti_xpd() {
int tgsi_translator::ti_kill() {
int i;
+ // XXX if this affects performance, we might want to do it after DCE
+ uses_kill = true;
+
for (i = 0; i < 4; ++i) {
if (info->tgsi_op == TGSI_OPCODE_KILL_IF)
emit_alu(ALU_OP2_KILLGT, NULL, 0, asrc(0.0f), asrc(args.src[0], i));
@@ -1120,23 +1337,24 @@ int tgsi_translator::ti_kill() {
int tgsi_translator::ti_arl() {
switch (info->tgsi_op) {
- case TGSI_OPCODE_ARR:
+ case TGSI_OPCODE_ARL:
if (ctx.is_egcm()) {
emit_alu(ALU_OP1_FLT_TO_INT_FLOOR, tgsi_dst(SEL_X), 0,
- asrc(args.src[0], 0));
+ asrc(args.src[0], 0));
} else {
value *t = create_temp();
emit_alu(ALU_OP1_FLOOR, t, 0, asrc(args.src[0], 0));
emit_alu(ALU_OP1_FLT_TO_INT, tgsi_dst(SEL_X), 0, asrc(t));
}
break;
- case TGSI_OPCODE_ARL:
+ case TGSI_OPCODE_ARR:
emit_alu(ALU_OP1_FLT_TO_INT, tgsi_dst(SEL_X), 0, asrc(args.src[0], 0));
break;
case TGSI_OPCODE_UARL:
emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, asrc(args.src[0], 0));
break;
- default: assert(!"ti_arl: unexpected opcode");
+ default:
+ assert(!"ti_arl: unexpected opcode");
}
return 0;
}
@@ -1145,39 +1363,45 @@ int tgsi_translator::ti_ssg() {
vvec t;
create_temps(t, 4);
if (info->tgsi_op == TGSI_OPCODE_SSG) {
- FOREACH_CHAN {
- emit_alu(ALU_OP3_CNDGE, t[ch], 0, asrc(args.src[0], ch),
- asrc(0.0f), asrc(-1.0f));
+ FOREACH_CHAN
+ {
+ emit_alu(ALU_OP3_CNDGE, t[ch], 0, asrc(args.src[0], ch), asrc(0.0f),
+ asrc(-1.0f));
}
begin_group();
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(ALU_OP3_CNDGT, tgsi_dst(ch), 0, asrc(args.src[0], ch),
- asrc(1.0f), asrc(t[ch]));
+ asrc(1.0f), asrc(t[ch]));
}
end_group();
} else { // ISSG
- FOREACH_CHAN {
- emit_alu(ALU_OP3_CNDGE_INT, t[ch], 0, asrc(args.src[0], ch, 0, 1),
- asrc(0u), asrc(-1u));
+ FOREACH_CHAN
+ {
+ emit_alu(ALU_OP3_CNDGE_INT, t[ch], 0, asrc(args.src[0], ch),
+ asrc(0u), asrc(-1u));
}
begin_group();
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(ALU_OP3_CNDGT_INT, tgsi_dst(ch), 0, asrc(args.src[0], ch),
- asrc(1u), asrc(t[ch]));
+ asrc(1u), asrc(t[ch]));
}
+ end_group();
}
return 0;
}
int tgsi_translator::ti_cmp() {
begin_group();
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
if (info->tgsi_op == TGSI_OPCODE_CMP)
emit_alu(ALU_OP3_CNDGE, tgsi_dst(ch), clamp, asrc(args.src[0], ch),
- asrc(args.src[2], ch), asrc(args.src[1], ch));
+ asrc(args.src[2], ch), asrc(args.src[1], ch));
else
emit_alu(ALU_OP3_CNDE_INT, tgsi_dst(ch), 0, asrc(args.src[0], ch),
- asrc(args.src[2], ch), asrc(args.src[1], ch));
+ asrc(args.src[2], ch), asrc(args.src[1], ch));
}
end_group();
return 0;
@@ -1187,14 +1411,16 @@ int tgsi_translator::ti_umad() {
vvec t;
create_temps(t, 4);
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(ALU_OP2_MULLO_INT, t[ch], 0, asrc(args.src[0], ch),
- asrc(args.src[1], ch));
+ asrc(args.src[1], ch));
}
begin_group();
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(ALU_OP2_ADD_INT, tgsi_dst(ch), 0, asrc(t[ch]),
- asrc(args.src[2], ch));
+ asrc(args.src[2], ch));
}
end_group();
return 0;
@@ -1203,10 +1429,12 @@ int tgsi_translator::ti_umad() {
int tgsi_translator::ti_f2iu() {
vvec t;
create_temps(t, 4);
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(ALU_OP1_TRUNC, t[ch], 0, asrc(args.src[0], ch));
}
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(info->isa_op, tgsi_dst(ch), 0, t[ch]);
}
return 0;
@@ -1214,9 +1442,10 @@ int tgsi_translator::ti_f2iu() {
int tgsi_translator::ti_ineg() {
begin_group();
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(ALU_OP2_SUB_INT, tgsi_dst(ch), 0, asrc(0u),
- asrc(args.src[0], ch));
+ asrc(args.src[0], ch));
}
end_group();
return 0;
@@ -1225,14 +1454,15 @@ int tgsi_translator::ti_ineg() {
int tgsi_translator::ti_iabs() {
vvec t;
create_temps(t, 4);
- FOREACH_CHAN {
- emit_alu(ALU_OP2_SUB_INT, t[ch], 0, asrc(0u),
- asrc(args.src[0], ch));
+ FOREACH_CHAN
+ {
+ emit_alu(ALU_OP2_SUB_INT, t[ch], 0, asrc(0u), asrc(args.src[0], ch));
}
begin_group();
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(ALU_OP3_CNDGE_INT, tgsi_dst(ch), 0, asrc(args.src[0], ch),
- asrc(args.src[0], ch), asrc(t[ch]));
+ asrc(args.src[0], ch), asrc(t[ch]));
}
end_group();
return 0;
@@ -1251,7 +1481,8 @@ int tgsi_translator::ti_divmod() {
mod = true;
case TGSI_OPCODE_UDIV:
break;
- default: assert(!"ti_divmod: unexpected tgsi opcode");
+ default:
+ assert(!"ti_divmod: unexpected tgsi opcode");
}
// TODO optimize for constant src1 (omit RECIP error correction)
@@ -1269,26 +1500,25 @@ int tgsi_translator::ti_divmod() {
value *t2z = create_temp();
value *t3x = create_temp();
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
if (signed_op) {
/* tmp2.x = -src0 */
- emit_alu(ALU_OP2_SUB_INT, t2x, 0, asrc(0u),
- asrc(args.src[0], ch));
+ emit_alu(ALU_OP2_SUB_INT, t2x, 0, asrc(0u), asrc(args.src[0], ch));
/* tmp2.y = -src1 */
- emit_alu(ALU_OP2_SUB_INT, t2y, 0, asrc(0u),
- asrc(args.src[0], ch));
+ emit_alu(ALU_OP2_SUB_INT, t2y, 0, asrc(0u), asrc(args.src[1], ch));
/* tmp2.z sign bit is set if src0 and src2 signs are different */
/* it will be a sign of the quotient */
if (!mod) {
- emit_alu(ALU_OP2_XOR_INT, t2x, 0, asrc(args.src[0], ch),
- asrc(args.src[1], ch));
+ emit_alu(ALU_OP2_XOR_INT, t2z, 0, asrc(args.src[0], ch),
+ asrc(args.src[1], ch));
}
/* tmp2.x = |src0| */
emit_alu(ALU_OP3_CNDGE_INT, t2x, 0, asrc(args.src[0], ch),
- asrc(args.src[0], ch), asrc(t2x));
+ asrc(args.src[0], ch), asrc(t2x));
/* tmp2.y = |src1| */
emit_alu(ALU_OP3_CNDGE_INT, t2y, 0, asrc(args.src[1], ch),
- asrc(args.src[1], ch), asrc(t2y));
+ asrc(args.src[1], ch), asrc(t2y));
} else { // unsigned
// copy sources to the same temps as in signed variant just
// to simplify generation of further operations.
@@ -1372,18 +1602,18 @@ int tgsi_translator::ti_divmod() {
if (mod) {
/* sign of the remainder is the same as the sign of src0 */
/* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
- emit_alu(ALU_OP3_CNDGE_INT, t0x, 0, asrc(t2x), asrc(t0z),
- asrc(t0x));
+ emit_alu(ALU_OP3_CNDGE_INT, tgsi_dst(ch), 0, asrc(t2x),
+ asrc(t0z), asrc(t0x));
} else {
/* fix the quotient sign (same as the sign of src0*src1) */
/* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
- emit_alu(ALU_OP3_CNDGE_INT, t0x, 0, asrc(t2z), asrc(t0z),
- asrc(t0x));
+ emit_alu(ALU_OP3_CNDGE_INT, tgsi_dst(ch), 0, asrc(t2z),
+ asrc(t0z), asrc(t0x));
}
} else { // unsigned
/* 19. dst = tmp1.y==0 ? tmp1.w : tmp0.z */
emit_alu(ALU_OP3_CNDE_INT, tgsi_dst(ch), 0, asrc(t1y), asrc(t1w),
- asrc(t0z));
+ asrc(t0z));
}
}
return 0;
@@ -1392,7 +1622,7 @@ int tgsi_translator::ti_divmod() {
fetch_node* tgsi_translator::create_fetch(unsigned op) {
fetch_node *f = sh->create_fetch();
f->bc.set_op(op);
- f->src.resize((f->bc.op_ptr->flags & FF_VTX) ? 1 : 4);
+ f->src.resize(4);
f->dst.resize(4);
VSWZ_XYZW(f->bc.dst_sel);
return f;
@@ -1455,8 +1685,7 @@ inline alu_src tgsi_translator::asrc(literal l, int abs, int neg) {
}
inline alu_src tgsi_translator::asrc(tgsi_arg& ta, int chan) {
- return alu_src(get_arg_value(ta, chan), ta.abs,
- ta.neg);
+ return alu_src(get_arg_value(ta, chan), ta.abs, ta.neg);
}
inline alu_src tgsi_translator::asrc(tgsi_arg& ta, int chan, int abs, int neg) {
@@ -1484,16 +1713,22 @@ int tgsi_translator::ti_lit() {
value *tx = create_temp();
value *tz = create_temp();
- emit_alu(ALU_OP2_MAX, tx, 0, asrc(args.src[0], SEL_Y), asrc(0.0f));
- emit_alu(ALU_OP1_LOG_CLAMPED, tz, 0, asrc(tx));
- emit_alu(ALU_OP3_MUL_LIT, tx, 0, asrc(tz), asrc(args.src[0], SEL_W),
- asrc(args.src[0], SEL_X));
- emit_alu(ALU_OP1_EXP_IEEE, tgsi_dst(SEL_Z), clamp, asrc(tx));
+ if (write_mask & (1 << SEL_Z)) {
+ emit_alu(ALU_OP2_MAX, tx, 0, asrc(args.src[0], SEL_Y), asrc(0.0f));
+ emit_alu(ALU_OP1_LOG_CLAMPED, tz, 0, asrc(tx));
+ emit_alu(ALU_OP3_MUL_LIT, tx, 0, asrc(tz), asrc(args.src[0], SEL_W),
+ asrc(args.src[0], SEL_X));
+ }
begin_group();
- emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, asrc(1.0f));
- emit_alu(ALU_OP2_MAX, tgsi_dst(SEL_Y), clamp, asrc(args.src[0], SEL_X),
- asrc(0.0f));
- emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f));
+ if (write_mask & (1 << SEL_X))
+ emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, asrc(1.0f));
+ if (write_mask & (1 << SEL_Y))
+ emit_alu(ALU_OP2_MAX, tgsi_dst(SEL_Y), clamp, asrc(args.src[0], SEL_X),
+ asrc(0.0f));
+ if (write_mask & (1 << SEL_Z))
+ emit_alu(ALU_OP1_EXP_IEEE, tgsi_dst(SEL_Z), clamp, asrc(tx));
+ if (write_mask & (1 << SEL_W))
+ emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f));
end_group();
return 0;
}
@@ -1529,6 +1764,8 @@ int tgsi_translator::emit_alu(unsigned op, value* dst, int clamp, value* s0,
value* tgsi_translator::get_tgsi_value(value_kind kind, unsigned index,
unsigned chan) {
switch (kind) {
+ case VLK_REG:
+ return sh->get_gpr_value(true, index, chan, false);
case VLK_CONST:
return sh->get_const_value(literals[(index << 2) + chan]);
case VLK_KCACHE:
@@ -1575,6 +1812,7 @@ int tgsi_translator::update_pipe_shader() {
ps->shader.vs_out_point_size = vs_out_point_size;
ps->shader.uses_tex_buffers = uses_tex_buffers;
ps->shader.has_txq_cube_array_z_comp = has_txq_cube_array_z_comp;
+ ps->shader.two_side = two_side;
return 0;
}
@@ -1588,12 +1826,14 @@ int tgsi_translator::ti_buffer_txq() {
if (ctx.is_egcm())
emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0,
- asrc(sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id >> 2,
- id & 3)));
+ asrc(
+ sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id >> 2,
+ id & 3)));
else
emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0,
- asrc(sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id << 1,
- 1)));
+ asrc(
+ sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id << 1,
+ 1)));
return 0;
}
@@ -1623,16 +1863,19 @@ int tgsi_translator::ti_vtx_fetch() {
if (ctx.is_egcm())
return 0;
- FOREACH_CHAN {
+ FOREACH_CHAN
+ {
emit_alu(ALU_OP2_AND_INT, f->dst[ch], 0, asrc(f->dst[ch]),
- asrc(sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id << 1,
- ch)));
+ asrc(
+ sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id << 1,
+ ch)));
}
if (write_mask & (1 << SEL_W)) {
emit_alu(ALU_OP2_AND_INT, f->dst[SEL_W], 0, asrc(f->dst[SEL_W]),
- asrc(sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER,
- 1 + (id << 1), 0)));
+ asrc(
+ sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER,
+ 1 + (id << 1), 0)));
}
return 0;
}
@@ -1706,14 +1949,16 @@ int tgsi_translator::ti_tex() {
alu_packed_node *p = sh->create_alu_packed();
create_temps(tv, 4);
- for (i = 0; i < 4; ++i)
- p->push_back(
- build_alu(ALU_OP2_CUBE, tv[i], 0,
- asrc(args.src[0], cube_swizzle[i]),
- asrc(args.src[0], cube_swizzle[3 - i])));
+ for (i = 0; i < 4; ++i) {
+ alu_node *a = build_alu(ALU_OP2_CUBE, tv[i], 0,
+ asrc(args.src[0], cube_swizzle[i]),
+ asrc(args.src[0], cube_swizzle[3 - i]));
+ a->bc.slot = i;
+ p->push_back(a);
+ }
emit_node(p);
- emit_alu(ALU_OP1_RECIP_IEEE, tv[SEL_Z], 0, asrc(tv[SEL_Z]));
+ emit_alu(ALU_OP1_RECIP_IEEE, tv[SEL_Z], 0, asrc(tv[SEL_Z], 1));
emit_alu(ALU_OP3_MULADD, tv[SEL_X], 0, asrc(tv[SEL_X]), asrc(tv[SEL_Z]),
asrc(1.5f));
emit_alu(ALU_OP3_MULADD, tv[SEL_Y], 0, asrc(tv[SEL_Y]), asrc(tv[SEL_Z]),
@@ -1790,7 +2035,7 @@ int tgsi_translator::ti_tex() {
emit_node(f);
value *tx = create_temp();
- emit_alu(ALU_OP2_MULLO_INT, tw, 0, asrc(src[3]), asrc(4u));
+ emit_alu(ALU_OP2_MULLO_INT, tx, 0, asrc(src[3]), asrc(4u));
emit_alu(ALU_OP2_LSHR_INT, src[3], 0, asrc(tw), asrc(tx));
emit_alu(ALU_OP2_AND_INT, src[3], 0, asrc(src[3]), asrc(0xFu));
}
@@ -2020,11 +2265,25 @@ int tgsi_translator::ti_end_loop() {
return 0;
}
-vvec tgsi_translator::fetch_rel_const(tgsi_arg& ta) {
+int tgsi_translator::split_src_arg(tgsi_arg &ta) {
+ int k;
+ vvec t;
+ create_temps(t, 4);
+
+ for (k = 0; k < 4; ++k) {
+ emit_alu(ALU_OP1_MOV, t[k], 0, asrc(get_arg_value(ta, k)));
+ }
+ ta.rel = 0;
+ ta.values = t;
+ ta.kind = VLK_TEMP;
+ return 0;
+}
+
+int tgsi_translator::fetch_rel_const(tgsi_arg& ta) {
int i;
value* t = create_temp();
value* addr = get_tgsi_value(VLK_TGSI_ADDR, ta.rel_addr_index, 0);
- emit_alu(ALU_OP2_ADD_INT, t, 0, asrc(addr), asrc((unsigned)ta.sel));
+ emit_alu(ALU_OP2_ADD_INT, t, 0, asrc(addr), asrc((unsigned) ta.sel));
vvec r;
create_temps(r, 4);
@@ -2046,8 +2305,10 @@ vvec tgsi_translator::fetch_rel_const(tgsi_arg& ta) {
}
emit_node(f);
- return r;
+ ta.values = r;
+ ta.rel = 0;
+ ta.kind = VLK_TEMP;
+ return 0;
}
-
} // namespace r600_sb
diff --git a/src/gallium/drivers/r600/sb/sb_tgsi.h b/src/gallium/drivers/r600/sb/sb_tgsi.h
index f70e368a81..bbfb115315 100644
--- a/src/gallium/drivers/r600/sb/sb_tgsi.h
+++ b/src/gallium/drivers/r600/sb/sb_tgsi.h
@@ -131,6 +131,14 @@ class tgsi_translator {
int uses_tex_buffers;
int has_txq_cube_array_z_comp;
+ // XXX probably unused now
+ unsigned indirect_vlk;
+
+ int instanceid_index;
+ int vertexid_index;
+
+ boolean two_side;
+
unsigned clip_dist_write;
unsigned fs_write_all;
unsigned uses_kill;
@@ -160,7 +168,8 @@ public:
clip_vertex_write(), cv_output(), nr_ps_max_color_exports(),
nr_ps_color_exports(),
vs_out_misc_write(), vs_out_point_size(), uses_tex_buffers(),
- has_txq_cube_array_z_comp(),
+ has_txq_cube_array_z_comp(), indirect_vlk(),
+ instanceid_index(-1), vertexid_index(-1), two_side(),
clip_dist_write(), fs_write_all(), uses_kill(),
tgsi_proc(), interp_mask(),
file_offset(), current(),
@@ -172,13 +181,16 @@ private:
int spi_sid(int name, int sid);
- int parse_tokens();
+ int parse_declarations();
+ int parse_instructions();
int parse_property();
int parse_declaration();
int parse_immediate();
int parse_instruction();
+ int split_src_arg(tgsi_arg &ta);
+
int emit_inputs();
int get_ij(shader_io &in);
alu_packed_node* build_interp(shader_io& in, unsigned type);
@@ -300,7 +312,7 @@ private:
alu_src asrc(tgsi_arg& ta, int chan);
alu_src asrc(tgsi_arg& ta, int chan, int abs, int neg);
- value* create_temp() { return sh->create_temp_value(); }
+ value* create_temp(int chan = 0) { return sh->create_temp_value(chan); }
void create_temps(vvec &temps, int n) {
temps.resize(n);
for (int i = 0; i < n; ++i)
@@ -311,7 +323,8 @@ private:
fetch_node* create_fetch(unsigned op);
- vvec fetch_rel_const(tgsi_arg& ta);};
+ int fetch_rel_const(tgsi_arg& ta);
+};
} // namespace r600_sb
diff --git a/src/gallium/drivers/r600/sb/sb_valtable.cpp b/src/gallium/drivers/r600/sb/sb_valtable.cpp
index b87d957a89..ad2e78b611 100644
--- a/src/gallium/drivers/r600/sb/sb_valtable.cpp
+++ b/src/gallium/drivers/r600/sb/sb_valtable.cpp
@@ -145,7 +145,7 @@ sb_ostream& operator << (sb_ostream &o, value &v) {
sel_chan g;
- if (v.is_rel()) {
+ if (v.array) {
g = v.array->gpr;
} else {
g = v.gpr;