diff options
-rw-r--r-- | src/compiler/nir/nir.h | 2 | ||||
-rw-r--r-- | src/compiler/nir/nir_opt_peephole_select.c | 82 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir.c | 2 |
4 files changed, 57 insertions, 31 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index aac247c79c..8d1afb98cb 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2600,7 +2600,7 @@ bool nir_opt_dead_cf(nir_shader *shader); bool nir_opt_gcm(nir_shader *shader, bool value_number); -bool nir_opt_peephole_select(nir_shader *shader); +bool nir_opt_peephole_select(nir_shader *shader, unsigned limit); bool nir_opt_remove_phis(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c index 633e9f486c..6a73d73707 100644 --- a/src/compiler/nir/nir_opt_peephole_select.c +++ b/src/compiler/nir/nir_opt_peephole_select.c @@ -32,23 +32,33 @@ * Implements a small peephole optimization that looks for * * if (cond) { - * <empty> + * <then SSA defs> * } else { - * <empty> + * <else SSA defs> * } * phi * ... * phi * - * and replaces it with a series of selects. It can also handle the case - * where, instead of being empty, the if may contain some move operations - * whose only use is one of the following phi nodes. This happens all the - * time when the SSA form comes from a conditional assignment with a - * swizzle. + * and replaces it with: + * + * <then SSA defs> + * <else SSA defs> + * bcsel + * ... + * bcsel + * + * where the SSA defs are ALU operations or other cheap instructions (not + * texturing, for example). + * + * If the number of ALU operations in the branches is greater than the limit + * parameter, then the optimization is skipped. In limit=0 mode, the SSA defs + * must only be MOVs which we expect to get copy-propagated away once they're + * out of the inner blocks. */ static bool -block_check_for_allowed_instrs(nir_block *block) +block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok) { nir_foreach_instr(instr, block) { switch (instr->type) { @@ -67,6 +77,11 @@ block_check_for_allowed_instrs(nir_block *block) } break; + case nir_intrinsic_load_uniform: + if (!alu_ok) + return false; + break; + default: return false; } @@ -89,29 +104,36 @@ block_check_for_allowed_instrs(nir_block *block) case nir_op_vec2: case nir_op_vec3: case nir_op_vec4: - /* It must be a move-like operation. */ break; default: - return false; + if (!alu_ok) { + /* It must be a move-like operation. */ + return false; + } + break; } - /* Can't handle saturate */ - if (mov->dest.saturate) - return false; - /* It must be SSA */ if (!mov->dest.dest.is_ssa) return false; - /* It cannot have any if-uses */ - if (!list_empty(&mov->dest.dest.ssa.if_uses)) - return false; + if (alu_ok) { + (*count)++; + } else { + /* Can't handle saturate */ + if (mov->dest.saturate) + return false; - /* The only uses of this definition must be phi's in the successor */ - nir_foreach_use(use, &mov->dest.dest.ssa) { - if (use->parent_instr->type != nir_instr_type_phi || - use->parent_instr->block != block->successors[0]) + /* It cannot have any if-uses */ + if (!list_empty(&mov->dest.dest.ssa.if_uses)) return false; + + /* The only uses of this definition must be phi's in the successor */ + nir_foreach_use(use, &mov->dest.dest.ssa) { + if (use->parent_instr->type != nir_instr_type_phi || + use->parent_instr->block != block->successors[0]) + return false; + } } break; } @@ -125,7 +147,7 @@ block_check_for_allowed_instrs(nir_block *block) } static bool -nir_opt_peephole_select_block(nir_block *block, void *mem_ctx) +nir_opt_peephole_select_block(nir_block *block, void *mem_ctx, unsigned limit) { if (nir_cf_node_is_first(&block->cf_node)) return false; @@ -147,8 +169,12 @@ nir_opt_peephole_select_block(nir_block *block, void *mem_ctx) nir_block *else_block = nir_cf_node_as_block(else_node); /* ... and those blocks must only contain "allowed" instructions. */ - if (!block_check_for_allowed_instrs(then_block) || - !block_check_for_allowed_instrs(else_block)) + unsigned count = 0; + if (!block_check_for_allowed_instrs(then_block, &count, limit != 0) || + !block_check_for_allowed_instrs(else_block, &count, limit != 0)) + return false; + + if (count > limit) return false; /* At this point, we know that the previous CFG node is an if-then @@ -212,13 +238,13 @@ nir_opt_peephole_select_block(nir_block *block, void *mem_ctx) } static bool -nir_opt_peephole_select_impl(nir_function_impl *impl) +nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit) { void *mem_ctx = ralloc_parent(impl); bool progress = false; nir_foreach_block_safe(block, impl) { - progress |= nir_opt_peephole_select_block(block, mem_ctx); + progress |= nir_opt_peephole_select_block(block, mem_ctx, limit); } if (progress) @@ -228,13 +254,13 @@ nir_opt_peephole_select_impl(nir_function_impl *impl) } bool -nir_opt_peephole_select(nir_shader *shader) +nir_opt_peephole_select(nir_shader *shader, unsigned limit) { bool progress = false; nir_foreach_function(function, shader) { if (function->impl) - progress |= nir_opt_peephole_select_impl(function->impl); + progress |= nir_opt_peephole_select_impl(function->impl, limit); } return progress; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 986a1ffc64..81c67168c1 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1430,7 +1430,7 @@ vc4_optimize_nir(struct nir_shader *s) NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_cse); - NIR_PASS(progress, s, nir_opt_peephole_select); + NIR_PASS(progress, s, nir_opt_peephole_select, 8); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index fbc84c474f..744865bd4c 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -416,7 +416,7 @@ nir_optimize(nir_shader *nir, bool is_scalar) OPT(nir_copy_prop); OPT(nir_opt_dce); OPT(nir_opt_cse); - OPT(nir_opt_peephole_select); + OPT(nir_opt_peephole_select, 0); OPT(nir_opt_algebraic); OPT(nir_opt_constant_folding); OPT(nir_opt_dead_cf); |