From c46e7a05e501e02b10dbc06772c0ef01308f60d5 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 15 Sep 2010 13:59:09 +0200 Subject: nv50: improve and fix modifier folding optimization Execute before folding loads, because we don't check if it's legal in lower_mods. Ensure that a value's insn pointer is updated when transferring it to a different instruction. --- src/gallium/drivers/nv50/nv50_pc.c | 1 + src/gallium/drivers/nv50/nv50_pc.h | 1 - src/gallium/drivers/nv50/nv50_pc_emit.c | 5 +++ src/gallium/drivers/nv50/nv50_pc_optimize.c | 65 ++++++++++++++++++----------- 4 files changed, 46 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index 78aca8fd56..2706d88779 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -104,6 +104,7 @@ nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value) case NV_OP_FLOOR: case NV_OP_TRUNC: case NV_OP_CVT: + case NV_OP_NEG: case NV_OP_MAD: case NV_OP_MUL: case NV_OP_SAT: diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h index 8f15a82026..92c6be5f6e 100644 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ b/src/gallium/drivers/nv50/nv50_pc.h @@ -220,7 +220,6 @@ struct nv_value { struct nv_ref { struct nv_value *value; - struct nv_instruction *insn; ubyte mod; ubyte typecast; ubyte flags; /* not used yet */ diff --git a/src/gallium/drivers/nv50/nv50_pc_emit.c b/src/gallium/drivers/nv50/nv50_pc_emit.c index 1eb44741f1..137a531dd6 100644 --- a/src/gallium/drivers/nv50/nv50_pc_emit.c +++ b/src/gallium/drivers/nv50/nv50_pc_emit.c @@ -654,6 +654,8 @@ emit_add_f32(struct nv_pc *pc, struct nv_instruction *i) { pc->emit[0] = 0xb0000000; + assert(!((i->src[0]->mod | i->src[1]->mod) & NV_MOD_ABS)); + if (SFILE(i, 1) == NV_FILE_IMM) { emit_form_IMM(pc, i, 0); @@ -665,6 +667,9 @@ emit_add_f32(struct nv_pc *pc, struct nv_instruction *i) if (i->src[0]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 26; if (i->src[1]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 27; + + if (i->saturate) + pc->emit[1] |= 0x20000000; } else { emit_form_MUL(pc, i); diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 3ff6db7dd2..921ed15691 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -336,6 +336,7 @@ nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b) continue; nvi->def[0] = sti->def[0]; + nvi->def[0]->insn = nvi; nvi->fixed = sti->fixed; nv_nvi_delete(sti); @@ -374,7 +375,7 @@ nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) if (j == 0 && ld->src[4]) /* can't load shared mem */ continue; - /* fold it ! */ /* XXX: ref->insn */ + /* fold it ! */ nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value); if (ld->src[4]) nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value); @@ -388,6 +389,7 @@ nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) return 0; } +/* NOTE: Assumes loads have not yet been folded. */ static int nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) { @@ -402,14 +404,7 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) nvi->src[1]->mod ^= NV_MOD_NEG; } - /* should not put any modifiers on NEG and ABS */ - assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod); - assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod); - - for (j = 0; j < 4; ++j) { - if (!nvi->src[j]) - break; - + for (j = 0; j < 4 && nvi->src[j]; ++j) { mi = nvi->src[j]->value->insn; if (!mi) continue; @@ -421,16 +416,32 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS; else continue; + assert(!(mod & mi->src[0]->mod & NV_MOD_NEG)); + + mod |= mi->src[0]->mod; + + if (mi->flags_def || mi->flags_src) + continue; - if (nvi->opcode == NV_OP_ABS) + if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) { + /* abs neg [abs] = abs */ mod &= ~(NV_MOD_NEG | NV_MOD_ABS); - else - if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) { - nvi->opcode = NV_OP_MOV; + } else + if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) { + /* neg as opcode and modifier on same insn cannot occur */ + /* neg neg abs = abs, neg neg = identity */ + assert(j == 0); + if (mod & NV_MOD_ABS) + nvi->opcode = NV_OP_ABS; + else + if (nvi->flags_def) + nvi->opcode = NV_OP_CVT; + else + nvi->opcode = NV_OP_MOV; mod = 0; } - if (!(nv50_supported_src_mods(nvi->opcode, j) & mod)) + if ((nv50_supported_src_mods(nvi->opcode, j) & mod) != mod) continue; nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value); @@ -441,11 +452,15 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) if (nvi->opcode == NV_OP_SAT) { mi = nvi->src[0]->value->insn; - if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) { - mi->saturate = 1; - mi->def[0] = nvi->def[0]; - nv_nvi_delete(nvi); - } + if (mi->opcode != NV_OP_ADD || mi->opcode != NV_OP_MAD) + continue; + if (mi->flags_def || mi->def[0]->refc > 1) + continue; + + mi->saturate = 1; + mi->def[0] = nvi->def[0]; + mi->def[0]->insn = mi; + nv_nvi_delete(nvi); } } DESCEND_ARBITRARY(j, nv_pass_lower_mods); @@ -956,7 +971,7 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) if (!nv50_nvi_can_predicate(nvi)) break; -#ifdef NV50_PC_DEBUG +#ifdef NV50PC_DEBUG if (nvi) { debug_printf("cannot predicate: "); nv_print_instruction(nvi); } @@ -1081,6 +1096,11 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) if (ret) return ret; + pc->pass_seq++; + ret = nv_pass_lower_mods(&pass, root); + if (ret) + return ret; + pc->pass_seq++; ret = nv_pass_fold_loads(&pass, root); if (ret) @@ -1106,11 +1126,6 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) if (ret) return ret; - pc->pass_seq++; - ret = nv_pass_lower_mods(&pass, root); - if (ret) - return ret; - dce.pc = pc; do { dce.removed = 0; -- cgit v1.2.3