From a0d443e9c72713c2a247d0e167932130e100ed51 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Wed, 24 Jul 2013 08:30:31 +0400 Subject: wip (no piglit regressions on evergreen) --- src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 6 +- src/gallium/drivers/r600/sb/sb_tgsi.cpp | 101 +++++++++++++++---------- 2 files changed, 66 insertions(+), 41 deletions(-) diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp index ffeb08414b..4218990a19 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp @@ -350,7 +350,11 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) { if (lv == literal(0)) src.sel = ALU_SRC_0; - else if (lv == literal(0.5f)) + else if (lv == literal(0x80000000)) { + // XXX probably we shouldn't have -0 here in the first place? + src.sel = ALU_SRC_0; + src.neg = 1; + } else if (lv == literal(0.5f)) src.sel = ALU_SRC_0_5; else if (lv == literal(-0.5f)) { src.sel = ALU_SRC_0_5; diff --git a/src/gallium/drivers/r600/sb/sb_tgsi.cpp b/src/gallium/drivers/r600/sb/sb_tgsi.cpp index 0f0d6df745..361323dd31 100644 --- a/src/gallium/drivers/r600/sb/sb_tgsi.cpp +++ b/src/gallium/drivers/r600/sb/sb_tgsi.cpp @@ -46,7 +46,7 @@ const tgsi_translator::tgsi_inst_info tgsi_translator::tgsi_info_table[TGSI_OPCO /* 0 */ TI_DESC(ARL, 0, ti_arl, 0), /* 1 */ TI_DESC(MOV, ALU_OP1_MOV, ti_alu, 0), /* 2 */ TI_DESC(LIT, 0, ti_lit, 0), - /* 3 */ TI_DESC(RCP, ALU_OP1_RECIP_CLAMPED, ti_repl, 0), + /* 3 */ TI_DESC(RCP, ALU_OP1_RECIP_IEEE, ti_repl, 0), /* 4 */ TI_DESC(RSQ, ALU_OP1_RECIPSQRT_CLAMPED, ti_repl, 0), /* 5 */ TI_DESC(EXP, 0, ti_exp, 0), /* 6 */ TI_DESC(LOG, 0, ti_log, 0), @@ -369,13 +369,15 @@ int tgsi_translator::parse_declaration() { input[i].d.centroid = d->Interp.Centroid; input[i].tgsi_index = d->Range.First; if (tgsi_proc == TGSI_PROCESSOR_FRAGMENT) { - if (input[i].d.interpolate == TGSI_INTERPOLATE_LINEAR - || input[i].d.interpolate == TGSI_INTERPOLATE_COLOR) - interp_mask |= (1 << 1); - else if (input[i].d.interpolate == TGSI_INTERPOLATE_PERSPECTIVE) - interp_mask |= (1 << 0); - if (input[i].d.centroid) - interp_mask |= (1 << 2); + if (input[i].d.name != TGSI_SEMANTIC_POSITION && + input[i].d.name != TGSI_SEMANTIC_FACE) { + if (input[i].d.interpolate == TGSI_INTERPOLATE_LINEAR) + interp_mask |= (1 << 1); + else if (input[i].d.interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + interp_mask |= (1 << 0); + if (input[i].d.centroid) + interp_mask |= (1 << 2); + } input[i].d.spi_sid = spi_sid(input[i].d.name, input[i].d.sid); @@ -822,7 +824,10 @@ int tgsi_translator::emit_inputs() { } case TARGET_PS: if (ctx.is_egcm()) { - unsigned ij_pairs = ((interp_mask & 1) + (interp_mask >> 1)) + if (!interp_mask) + interp_mask = 1; + + unsigned ij_pairs = ((interp_mask & 1) + ((interp_mask >> 1) & 1)) * ((interp_mask & 4) ? 2 : 1); unsigned mask = (1u << 2 * ij_pairs) - 1; @@ -833,7 +838,6 @@ int tgsi_translator::emit_inputs() { ++gpr; mask >>= 4; } - gpr_reserved = gpr; } @@ -873,11 +877,6 @@ int tgsi_translator::emit_inputs() { } else { sh->add_pinned_inputs(sh->root->dst, VLK_TGSI_INPUT, in.tgsi_index, 0xF, false, in.d.gpr); - - if (fragcoord_input == i) { - value* w = get_tgsi_value(VLK_TGSI_INPUT, i, SEL_W); - emit_alu(ALU_OP1_RECIP_IEEE, w, 0, asrc(w)); - } } if (two_side) { @@ -908,6 +907,13 @@ int tgsi_translator::emit_inputs() { } } } + + if (fragcoord_input != -1) { + value* w = get_tgsi_value(VLK_TGSI_INPUT, fragcoord_input, SEL_W); + emit_alu(ALU_OP1_RECIP_IEEE, w, 0, asrc(w)); + } + + break; default: assert(!"unexpected target"); @@ -959,10 +965,12 @@ alu_group_node* tgsi_translator::build_interp_flat(shader_io& in) { } inline int tgsi_translator::get_ij(shader_io& in) { - int ij; - ij = in.d.centroid ? 1 : 0; - if (in.d.interpolate != TGSI_INTERPOLATE_PERSPECTIVE) - ij += (interp_mask & 1) + ((interp_mask >> 2) & 1); + int ij = 0; + if (in.d.interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + return in.d.centroid ? 1 : 0; + else if (in.d.interpolate == TGSI_INTERPOLATE_LINEAR) + return (interp_mask & 1) + ((interp_mask >> 2) & 1) + + (in.d.centroid ? 1 : 0); return ij; } @@ -992,8 +1000,6 @@ value* tgsi_translator::get_arg_value(unsigned index, unsigned chan) { } int tgsi_translator::ti_alu() { - begin_group(); - switch (info->tgsi_op) { case TGSI_OPCODE_SUB: args.src[1].neg = !args.src[1].neg; @@ -1004,6 +1010,7 @@ int tgsi_translator::ti_alu() { break; } + begin_group(); if (unlikely(info->flags & TIF_ALU_SWAPSRC01)) { FOREACH_CHAN { @@ -1076,17 +1083,16 @@ int tgsi_translator::ti_dot() { } int tgsi_translator::ti_repl() { - switch (info->tgsi_op) { case TGSI_OPCODE_RSQ: args.src[0].abs = 1; args.src[0].neg = 0; break; } - FOREACH_CHAN - { - emit_alu(info->isa_op, 0, ch); - } + + value *t = create_temp(); + emit_alu(info->isa_op, t, clamp, asrc(args.src[0], SEL_X)); + ti_replicate(t); return 0; } @@ -1148,10 +1154,8 @@ alu_node* tgsi_translator::create_alu(unsigned op) { int tgsi_translator::ti_trig() { value *t = prepare_trig(asrc(args.src[0], 0)); - FOREACH_CHAN - { - emit_alu(info->isa_op, tgsi_dst(ch), clamp, t); - } + emit_alu(info->isa_op, t, clamp, asrc(t)); + ti_replicate(t); return 0; } @@ -1160,9 +1164,9 @@ int tgsi_translator::ti_scs() { begin_group(); if (write_mask & (1 << SEL_X)) - emit_alu(ALU_OP1_COS, tgsi_dst(SEL_X), clamp, t); + emit_alu(ALU_OP1_COS, tgsi_dst(SEL_X), clamp, asrc(t)); if (write_mask & (1 << SEL_Y)) - emit_alu(ALU_OP1_SIN, tgsi_dst(SEL_Y), clamp, t); + emit_alu(ALU_OP1_SIN, tgsi_dst(SEL_Y), clamp, asrc(t)); if (write_mask & (1 << SEL_Z)) emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), 0, asrc(0.0f)); if (write_mask & (1 << SEL_W)) @@ -1184,8 +1188,13 @@ value* tgsi_translator::prepare_trig(alu_src s) { if (ctx.is_r600()) emit_alu(ALU_OP3_MULADD, t, 0, asrc(t), asrc(double_pi), asrc(neg_pi)); else -// emit_alu(ALU_OP2_ADD, t, 0, asrc(t), asrc(-0.5f)); +#if 0 + emit_alu(ALU_OP2_ADD, t, 0, asrc(t), asrc(-0.5f)); +#else + // using muladd just to reduce differences from default backend for + // debugging emit_alu(ALU_OP3_MULADD, t, 0, asrc(t), asrc(1.0f), asrc(0.5f, 0, 1)); +#endif return t; } @@ -1228,11 +1237,11 @@ int tgsi_translator::ti_log() { begin_group(); if (write_mask & (1 << SEL_X)) - emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), clamp, t2); + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), clamp, asrc(t2)); if (write_mask & (1 << SEL_Y)) emit_alu(ALU_OP2_MUL, tgsi_dst(SEL_Y), clamp, s, asrc(t3)); if (write_mask & (1 << SEL_Z)) - emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), clamp, t); + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), clamp, asrc(t)); if (write_mask & (1 << SEL_W)) emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f)); end_group(); @@ -1278,7 +1287,7 @@ int tgsi_translator::ti_pow() { value* t = create_temp(); emit_alu(ALU_OP1_LOG_IEEE, t, 0, asrc(args.src[0], 0)); - emit_alu(ALU_OP2_MUL, t, 0, asrc(args.src[1], 0), t); + emit_alu(ALU_OP2_MUL, t, 0, asrc(args.src[1], 0), asrc(t)); emit_alu(ALU_OP1_EXP_IEEE, t, clamp, asrc(t)); ti_replicate(t); return 0; @@ -1433,10 +1442,12 @@ int tgsi_translator::ti_f2iu() { { emit_alu(ALU_OP1_TRUNC, t[ch], 0, asrc(args.src[0], ch)); } + begin_group(); FOREACH_CHAN { emit_alu(info->isa_op, tgsi_dst(ch), 0, t[ch]); } + end_group(); return 0; } @@ -1500,6 +1511,9 @@ int tgsi_translator::ti_divmod() { value *t2z = create_temp(); value *t3x = create_temp(); + vvec dst; + create_temps(dst, 4); + FOREACH_CHAN { if (signed_op) { @@ -1602,20 +1616,26 @@ int tgsi_translator::ti_divmod() { if (mod) { /* sign of the remainder is the same as the sign of src0 */ /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ - emit_alu(ALU_OP3_CNDGE_INT, tgsi_dst(ch), 0, asrc(t2x), + emit_alu(ALU_OP3_CNDGE_INT, dst[ch], 0, asrc(t2x), asrc(t0z), asrc(t0x)); } else { /* fix the quotient sign (same as the sign of src0*src1) */ /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ - emit_alu(ALU_OP3_CNDGE_INT, tgsi_dst(ch), 0, asrc(t2z), + emit_alu(ALU_OP3_CNDGE_INT, dst[ch], 0, asrc(t2z), asrc(t0z), asrc(t0x)); } } else { // unsigned /* 19. dst = tmp1.y==0 ? tmp1.w : tmp0.z */ - emit_alu(ALU_OP3_CNDE_INT, tgsi_dst(ch), 0, asrc(t1y), asrc(t1w), + emit_alu(ALU_OP3_CNDE_INT, dst[ch], 0, asrc(t1y), asrc(t1w), asrc(t0z)); } } + begin_group(); + FOREACH_CHAN + { + emit_alu(ALU_OP1_MOV, tgsi_dst(ch), 0, asrc(dst[ch])); + } + end_group(); return 0; } @@ -1990,6 +2010,7 @@ int tgsi_translator::ti_tex() { } /* for cube forms of lod and bias we need to route things */ + // XXX just copy target value pointer to src? if (tgsi_op == TGSI_OPCODE_TXB || tgsi_op == TGSI_OPCODE_TXL) emit_alu(ALU_OP1_MOV, tv[SEL_Z], 0, asrc(args.src[0], SEL_W)); else if (tgsi_op == TGSI_OPCODE_TXB2 || tgsi_op == TGSI_OPCODE_TXL2) @@ -2178,7 +2199,7 @@ int tgsi_translator::ti_if() { alu_node *a = build_alu(info->isa_op, sh->get_special_value(SV_EXEC_MASK), 0, asrc(args.src[0], 0), asrc(0.0f)); - a->dst.insert(a->dst.begin(), 2, NULL); + a->dst.insert(a->dst.begin(), 2, (value*)NULL); emit_node(a); region_node *r = sh->create_region(); -- cgit v1.2.3