From c3906264a5caef8e12913bed6d6779b153ad996b Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 13 Apr 2012 11:39:00 -0700 Subject: glsl: Skip type conversions in loop-size estimates on float-only arches. On float-only architectures (such as i915), type conversions are all no-ops, so we shouldn't include them when estimating how many instructions a loop contains. Overestimating can result in the unroller deciding not to unroll the loop, which can be detrimental on these architectures. Signed-off-by: Kenneth Graunke --- src/glsl/glsl_parser_extras.cpp | 5 +++-- src/glsl/ir.cpp | 6 ++++++ src/glsl/ir.h | 3 +++ src/glsl/ir_optimization.h | 3 ++- src/glsl/linker.cpp | 2 +- src/glsl/loop_analysis.cpp | 4 ++++ src/glsl/loop_analysis.h | 3 ++- src/glsl/loop_unroll.cpp | 20 +++++++++++++++----- src/glsl/main.cpp | 2 +- src/glsl/test_optpass.cpp | 2 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +- src/mesa/drivers/dri/intel/intel_tex_layout.c | 7 ++++++- src/mesa/main/ff_fragment_shader.cpp | 2 +- src/mesa/program/ir_to_mesa.cpp | 5 +++-- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- 15 files changed, 51 insertions(+), 18 deletions(-) diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index ae7a365f4b2..a67e8f5e378 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -1011,7 +1011,8 @@ ast_struct_specifier::ast_struct_specifier(const char *identifier, bool do_common_optimization(exec_list *ir, bool linked, bool uniform_locations_assigned, - unsigned max_unroll_iterations) + unsigned max_unroll_iterations, + bool native_integers) { GLboolean progress = GL_FALSE; @@ -1050,7 +1051,7 @@ do_common_optimization(exec_list *ir, bool linked, loop_state *ls = analyze_loop_variables(ir); if (ls->loop_found) { progress = set_loop_controls(ir, ls) || progress; - progress = unroll_loops(ir, ls, max_unroll_iterations) || progress; + progress = unroll_loops(ir, ls, max_unroll_iterations, native_integers) || progress; } delete ls; diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 1ba87515ea7..b601c7b11fd 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -507,6 +507,12 @@ ir_expression::get_operator(const char *str) return (ir_expression_operation) -1; } +bool +ir_expression::is_type_conversion() +{ + return (operation >= ir_unop_f2i && operation <= ir_unop_u2i); +} + ir_constant::ir_constant() { this->ir_type = ir_type_constant; diff --git a/src/glsl/ir.h b/src/glsl/ir.h index d6c6a607ae8..1ffa5f4ad39 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1021,6 +1021,9 @@ public: */ static ir_expression_operation get_operator(const char *); + /** Whether or not this expression is a type conversion (i.e. i2f) */ + bool is_type_conversion(); + virtual void accept(ir_visitor *v) { v->visit(this); diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 35678358307..b5c5ee17f4e 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -39,7 +39,8 @@ bool do_common_optimization(exec_list *ir, bool linked, bool uniform_locations_assigned, - unsigned max_unroll_iterations); + unsigned max_unroll_iterations, + bool native_integers); bool do_algebraic(exec_list *instructions); bool do_constant_folding(exec_list *instructions); diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 6ba297237c7..e082d3e4bde 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2313,7 +2313,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) unsigned max_unroll = ctx->ShaderCompilerOptions[i].MaxUnrollIterations; - while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, false, max_unroll)) + while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, false, max_unroll, ctx->Const.NativeIntegers)) ; } diff --git a/src/glsl/loop_analysis.cpp b/src/glsl/loop_analysis.cpp index 6a0e4da5100..8bbc0caa20f 100644 --- a/src/glsl/loop_analysis.cpp +++ b/src/glsl/loop_analysis.cpp @@ -238,6 +238,10 @@ loop_analysis::visit_leave(ir_loop *ir) if (ls->contains_calls) return visit_continue; + printf("KAYDEN: about to analyze:\n"); + ir->print(); + printf("KAYDEN: -----------------\n"); + foreach_list(node, &ir->body_instructions) { /* Skip over declarations at the start of a loop. */ diff --git a/src/glsl/loop_analysis.h b/src/glsl/loop_analysis.h index 8bed1db0210..9c88867b84a 100644 --- a/src/glsl/loop_analysis.h +++ b/src/glsl/loop_analysis.h @@ -57,7 +57,8 @@ set_loop_controls(exec_list *instructions, loop_state *ls); extern bool -unroll_loops(exec_list *instructions, loop_state *ls, unsigned max_iterations); +unroll_loops(exec_list *instructions, loop_state *ls, unsigned max_iterations, + bool native_integers); /** diff --git a/src/glsl/loop_unroll.cpp b/src/glsl/loop_unroll.cpp index 3434fde6292..3d5cbcf8046 100644 --- a/src/glsl/loop_unroll.cpp +++ b/src/glsl/loop_unroll.cpp @@ -27,11 +27,13 @@ class loop_unroll_visitor : public ir_hierarchical_visitor { public: - loop_unroll_visitor(loop_state *state, unsigned max_iterations) + loop_unroll_visitor(loop_state *state, unsigned max_iterations, + bool native_integers) { this->state = state; this->progress = false; this->max_iterations = max_iterations; + this->native_integers = native_integers; } virtual ir_visitor_status visit_leave(ir_loop *ir); @@ -40,6 +42,7 @@ public: bool progress; unsigned max_iterations; + bool native_integers; }; @@ -54,11 +57,13 @@ class loop_unroll_count : public ir_hierarchical_visitor { public: int nodes; bool fail; + bool native_integers; - loop_unroll_count(exec_list *list) + loop_unroll_count(exec_list *list, bool native_ints) { nodes = 0; fail = false; + native_integers = native_ints; run(list); } @@ -71,6 +76,10 @@ public: virtual ir_visitor_status visit_enter(ir_expression *ir) { + /* On float-only architectures, type conversions are no-ops. */ + if (ir->is_type_conversion() && !native_integers) + return visit_continue; + nodes++; return visit_continue; } @@ -112,7 +121,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) /* Don't try to unroll nested loops and loops with a huge body. */ - loop_unroll_count count(&ir->body_instructions); + loop_unroll_count count(&ir->body_instructions, this->native_integers); if (count.fail || count.nodes * iterations > (int)max_iterations * 5) return visit_continue; @@ -243,9 +252,10 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) bool -unroll_loops(exec_list *instructions, loop_state *ls, unsigned max_iterations) +unroll_loops(exec_list *instructions, loop_state *ls, unsigned max_iterations, + bool native_integers) { - loop_unroll_visitor v(ls, max_iterations); + loop_unroll_visitor v(ls, max_iterations, native_integers); v.run(instructions); diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index d43bf1a7463..acb6a3c68f7 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -166,7 +166,7 @@ compile_shader(struct gl_context *ctx, struct gl_shader *shader) if (!state->error && !shader->ir->is_empty()) { bool progress; do { - progress = do_common_optimization(shader->ir, false, false, 32); + progress = do_common_optimization(shader->ir, false, false, 32, true); } while (progress); validate_ir_tree(shader->ir); diff --git a/src/glsl/test_optpass.cpp b/src/glsl/test_optpass.cpp index 6abafb5d311..ef4af252474 100644 --- a/src/glsl/test_optpass.cpp +++ b/src/glsl/test_optpass.cpp @@ -64,7 +64,7 @@ do_optimization(struct exec_list *ir, const char *optimization) if (sscanf(optimization, "do_common_optimization ( %d , %d ) ", &int_0, &int_1) == 2) { - return do_common_optimization(ir, int_0 != 0, false, int_1); + return do_common_optimization(ir, int_0 != 0, false, int_1, true); } else if (strcmp(optimization, "do_algebraic") == 0) { return do_algebraic(ir); } else if (strcmp(optimization, "do_constant_folding") == 0) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index aa1bfdbd230..7775bd3cc58 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -184,7 +184,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) false /* loops */ ) || progress; - progress = do_common_optimization(shader->ir, true, true, 32) + progress = do_common_optimization(shader->ir, true, true, 32, true) || progress; } while (progress); diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c index 65645bc46a4..db7b84f8a82 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.c +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c @@ -115,17 +115,22 @@ intel_vertical_texture_alignment_unit(struct intel_context *intel, if (_mesa_is_format_compressed(format)) return 4; - if (format == MESA_FORMAT_S8) + if (format == MESA_FORMAT_S8) { + printf("vertical align 8/4 (S8): format %s\n", _mesa_get_format_name(format)); return intel->gen >= 7 ? 8 : 4; + } GLenum base_format = _mesa_get_format_base_format(format); if (intel->gen >= 6 && (base_format == GL_DEPTH_COMPONENT || base_format == GL_DEPTH_STENCIL)) { + printf("vertical align 4: format %s\n", _mesa_get_format_name(format)); return 4; } + printf("vertical align 2: format %s\n", _mesa_get_format_name(format)); + return 2; } diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index 3c91b1a42ad..f4a6072d407 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -1324,7 +1324,7 @@ create_new_program(struct gl_context *ctx, struct state_key *key) validate_ir_tree(p.shader->ir); - while (do_common_optimization(p.shader->ir, false, false, 32)) + while (do_common_optimization(p.shader->ir, false, false, 32, ctx->Const.NativeIntegers)) ; reparent_ir(p.shader->ir, p.shader->ir); diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 840648e0449..a8873466886 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -3061,7 +3061,8 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; progress = do_common_optimization(ir, true, true, - options->MaxUnrollIterations) + options->MaxUnrollIterations, + false) || progress; progress = lower_quadop_vector(ir, true) || progress; @@ -3171,7 +3172,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader) /* Do some optimization at compile time to reduce shader IR size * and reduce later work if the same shader is linked multiple times */ - while (do_common_optimization(shader->ir, false, false, 32)) + while (do_common_optimization(shader->ir, false, false, 32, false)) ; validate_ir_tree(shader->ir); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 9e68deb3471..fdc782e4bba 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5002,7 +5002,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; progress = do_common_optimization(ir, true, true, - options->MaxUnrollIterations) + options->MaxUnrollIterations, + ctx->Const.NativeIntegers) || progress; progress = lower_quadop_vector(ir, false) || progress; -- cgit v1.2.3