summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2012-04-13 11:39:00 -0700
committerKenneth Graunke <kenneth@whitecape.org>2012-04-17 11:27:34 -0700
commitc3906264a5caef8e12913bed6d6779b153ad996b (patch)
tree74d7a8743e8a8e4abac1b025321d81ae5ae3c38d
parent48aec56559a0199d8099d9edff8e51312f55f15c (diff)
glsl: Skip type conversions in loop-size estimates on float-only arches.loop
On float-only architectures (such as i915), type conversions are all no-ops, so we shouldn't include them when estimating how many instructions a loop contains. Overestimating can result in the unroller deciding not to unroll the loop, which can be detrimental on these architectures. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r--src/glsl/glsl_parser_extras.cpp5
-rw-r--r--src/glsl/ir.cpp6
-rw-r--r--src/glsl/ir.h3
-rw-r--r--src/glsl/ir_optimization.h3
-rw-r--r--src/glsl/linker.cpp2
-rw-r--r--src/glsl/loop_analysis.cpp4
-rw-r--r--src/glsl/loop_analysis.h3
-rw-r--r--src/glsl/loop_unroll.cpp20
-rw-r--r--src/glsl/main.cpp2
-rw-r--r--src/glsl/test_optpass.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp2
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_layout.c7
-rw-r--r--src/mesa/main/ff_fragment_shader.cpp2
-rw-r--r--src/mesa/program/ir_to_mesa.cpp5
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp3
15 files changed, 51 insertions, 18 deletions
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index ae7a365f4b2..a67e8f5e378 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1011,7 +1011,8 @@ ast_struct_specifier::ast_struct_specifier(const char *identifier,
bool
do_common_optimization(exec_list *ir, bool linked,
bool uniform_locations_assigned,
- unsigned max_unroll_iterations)
+ unsigned max_unroll_iterations,
+ bool native_integers)
{
GLboolean progress = GL_FALSE;
@@ -1050,7 +1051,7 @@ do_common_optimization(exec_list *ir, bool linked,
loop_state *ls = analyze_loop_variables(ir);
if (ls->loop_found) {
progress = set_loop_controls(ir, ls) || progress;
- progress = unroll_loops(ir, ls, max_unroll_iterations) || progress;
+ progress = unroll_loops(ir, ls, max_unroll_iterations, native_integers) || progress;
}
delete ls;
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 1ba87515ea7..b601c7b11fd 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -507,6 +507,12 @@ ir_expression::get_operator(const char *str)
return (ir_expression_operation) -1;
}
+bool
+ir_expression::is_type_conversion()
+{
+ return (operation >= ir_unop_f2i && operation <= ir_unop_u2i);
+}
+
ir_constant::ir_constant()
{
this->ir_type = ir_type_constant;
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index d6c6a607ae8..1ffa5f4ad39 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1021,6 +1021,9 @@ public:
*/
static ir_expression_operation get_operator(const char *);
+ /** Whether or not this expression is a type conversion (i.e. i2f) */
+ bool is_type_conversion();
+
virtual void accept(ir_visitor *v)
{
v->visit(this);
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 35678358307..b5c5ee17f4e 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -39,7 +39,8 @@
bool do_common_optimization(exec_list *ir, bool linked,
bool uniform_locations_assigned,
- unsigned max_unroll_iterations);
+ unsigned max_unroll_iterations,
+ bool native_integers);
bool do_algebraic(exec_list *instructions);
bool do_constant_folding(exec_list *instructions);
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 6ba297237c7..e082d3e4bde 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2313,7 +2313,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
unsigned max_unroll = ctx->ShaderCompilerOptions[i].MaxUnrollIterations;
- while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, false, max_unroll))
+ while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, false, max_unroll, ctx->Const.NativeIntegers))
;
}
diff --git a/src/glsl/loop_analysis.cpp b/src/glsl/loop_analysis.cpp
index 6a0e4da5100..8bbc0caa20f 100644
--- a/src/glsl/loop_analysis.cpp
+++ b/src/glsl/loop_analysis.cpp
@@ -238,6 +238,10 @@ loop_analysis::visit_leave(ir_loop *ir)
if (ls->contains_calls)
return visit_continue;
+ printf("KAYDEN: about to analyze:\n");
+ ir->print();
+ printf("KAYDEN: -----------------\n");
+
foreach_list(node, &ir->body_instructions) {
/* Skip over declarations at the start of a loop.
*/
diff --git a/src/glsl/loop_analysis.h b/src/glsl/loop_analysis.h
index 8bed1db0210..9c88867b84a 100644
--- a/src/glsl/loop_analysis.h
+++ b/src/glsl/loop_analysis.h
@@ -57,7 +57,8 @@ set_loop_controls(exec_list *instructions, loop_state *ls);
extern bool
-unroll_loops(exec_list *instructions, loop_state *ls, unsigned max_iterations);
+unroll_loops(exec_list *instructions, loop_state *ls, unsigned max_iterations,
+ bool native_integers);
/**
diff --git a/src/glsl/loop_unroll.cpp b/src/glsl/loop_unroll.cpp
index 3434fde6292..3d5cbcf8046 100644
--- a/src/glsl/loop_unroll.cpp
+++ b/src/glsl/loop_unroll.cpp
@@ -27,11 +27,13 @@
class loop_unroll_visitor : public ir_hierarchical_visitor {
public:
- loop_unroll_visitor(loop_state *state, unsigned max_iterations)
+ loop_unroll_visitor(loop_state *state, unsigned max_iterations,
+ bool native_integers)
{
this->state = state;
this->progress = false;
this->max_iterations = max_iterations;
+ this->native_integers = native_integers;
}
virtual ir_visitor_status visit_leave(ir_loop *ir);
@@ -40,6 +42,7 @@ public:
bool progress;
unsigned max_iterations;
+ bool native_integers;
};
@@ -54,11 +57,13 @@ class loop_unroll_count : public ir_hierarchical_visitor {
public:
int nodes;
bool fail;
+ bool native_integers;
- loop_unroll_count(exec_list *list)
+ loop_unroll_count(exec_list *list, bool native_ints)
{
nodes = 0;
fail = false;
+ native_integers = native_ints;
run(list);
}
@@ -71,6 +76,10 @@ public:
virtual ir_visitor_status visit_enter(ir_expression *ir)
{
+ /* On float-only architectures, type conversions are no-ops. */
+ if (ir->is_type_conversion() && !native_integers)
+ return visit_continue;
+
nodes++;
return visit_continue;
}
@@ -112,7 +121,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
/* Don't try to unroll nested loops and loops with a huge body.
*/
- loop_unroll_count count(&ir->body_instructions);
+ loop_unroll_count count(&ir->body_instructions, this->native_integers);
if (count.fail || count.nodes * iterations > (int)max_iterations * 5)
return visit_continue;
@@ -243,9 +252,10 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
bool
-unroll_loops(exec_list *instructions, loop_state *ls, unsigned max_iterations)
+unroll_loops(exec_list *instructions, loop_state *ls, unsigned max_iterations,
+ bool native_integers)
{
- loop_unroll_visitor v(ls, max_iterations);
+ loop_unroll_visitor v(ls, max_iterations, native_integers);
v.run(instructions);
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index d43bf1a7463..acb6a3c68f7 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -166,7 +166,7 @@ compile_shader(struct gl_context *ctx, struct gl_shader *shader)
if (!state->error && !shader->ir->is_empty()) {
bool progress;
do {
- progress = do_common_optimization(shader->ir, false, false, 32);
+ progress = do_common_optimization(shader->ir, false, false, 32, true);
} while (progress);
validate_ir_tree(shader->ir);
diff --git a/src/glsl/test_optpass.cpp b/src/glsl/test_optpass.cpp
index 6abafb5d311..ef4af252474 100644
--- a/src/glsl/test_optpass.cpp
+++ b/src/glsl/test_optpass.cpp
@@ -64,7 +64,7 @@ do_optimization(struct exec_list *ir, const char *optimization)
if (sscanf(optimization, "do_common_optimization ( %d , %d ) ",
&int_0, &int_1) == 2) {
- return do_common_optimization(ir, int_0 != 0, false, int_1);
+ return do_common_optimization(ir, int_0 != 0, false, int_1, true);
} else if (strcmp(optimization, "do_algebraic") == 0) {
return do_algebraic(ir);
} else if (strcmp(optimization, "do_constant_folding") == 0) {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index aa1bfdbd230..7775bd3cc58 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -184,7 +184,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
false /* loops */
) || progress;
- progress = do_common_optimization(shader->ir, true, true, 32)
+ progress = do_common_optimization(shader->ir, true, true, 32, true)
|| progress;
} while (progress);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c
index 65645bc46a4..db7b84f8a82 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_layout.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c
@@ -115,17 +115,22 @@ intel_vertical_texture_alignment_unit(struct intel_context *intel,
if (_mesa_is_format_compressed(format))
return 4;
- if (format == MESA_FORMAT_S8)
+ if (format == MESA_FORMAT_S8) {
+ printf("vertical align 8/4 (S8): format %s\n", _mesa_get_format_name(format));
return intel->gen >= 7 ? 8 : 4;
+ }
GLenum base_format = _mesa_get_format_base_format(format);
if (intel->gen >= 6 &&
(base_format == GL_DEPTH_COMPONENT ||
base_format == GL_DEPTH_STENCIL)) {
+ printf("vertical align 4: format %s\n", _mesa_get_format_name(format));
return 4;
}
+ printf("vertical align 2: format %s\n", _mesa_get_format_name(format));
+
return 2;
}
diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp
index 3c91b1a42ad..f4a6072d407 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -1324,7 +1324,7 @@ create_new_program(struct gl_context *ctx, struct state_key *key)
validate_ir_tree(p.shader->ir);
- while (do_common_optimization(p.shader->ir, false, false, 32))
+ while (do_common_optimization(p.shader->ir, false, false, 32, ctx->Const.NativeIntegers))
;
reparent_ir(p.shader->ir, p.shader->ir);
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 840648e0449..a8873466886 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -3061,7 +3061,8 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
progress = do_common_optimization(ir, true, true,
- options->MaxUnrollIterations)
+ options->MaxUnrollIterations,
+ false)
|| progress;
progress = lower_quadop_vector(ir, true) || progress;
@@ -3171,7 +3172,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
/* Do some optimization at compile time to reduce shader IR size
* and reduce later work if the same shader is linked multiple times
*/
- while (do_common_optimization(shader->ir, false, false, 32))
+ while (do_common_optimization(shader->ir, false, false, 32, false))
;
validate_ir_tree(shader->ir);
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 9e68deb3471..fdc782e4bba 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5002,7 +5002,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
progress = do_common_optimization(ir, true, true,
- options->MaxUnrollIterations)
+ options->MaxUnrollIterations,
+ ctx->Const.NativeIntegers)
|| progress;
progress = lower_quadop_vector(ir, false) || progress;