diff options
author | Jordan Justen <jordan.l.justen@intel.com> | 2016-05-22 21:29:53 -0700 |
---|---|---|
committer | Jordan Justen <jordan.l.justen@intel.com> | 2016-05-23 10:15:25 -0700 |
commit | 43d775549d244c86e80ba00b4dd9add07bd72cce (patch) | |
tree | 681a2d04f1158d3c070d24f8dd252397dccc6b41 | |
parent | 6717700397a44aaa7cd76b56c6cc9831e0537643 (diff) |
i965: Add uniform to hold the CS thread ID base
This thread ID will be used to compute the gl_LocalInvocationIndex and
gl_LocalInvocationID values.
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compiler.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_cs.c | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 11 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir.c | 17 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 2 |
5 files changed, 43 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index fb0e9aec05..22f231620c 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -425,6 +425,7 @@ struct brw_cs_prog_data { bool uses_barrier; bool uses_num_work_groups; unsigned local_invocation_id_regs; + int thread_local_id_index; struct { /** @{ diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index 0ab9ebdab3..74e66bc7ef 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -64,6 +64,7 @@ brw_codegen_cs_prog(struct brw_context *brw, struct brw_cs_prog_data prog_data; bool start_busy = false; double start_time = 0; + nir_shader *nir = cp->program.Base.nir; struct brw_shader *cs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE]; @@ -71,6 +72,15 @@ brw_codegen_cs_prog(struct brw_context *brw, memset(&prog_data, 0, sizeof(prog_data)); + prog_data.thread_local_id_index = -1; + nir_foreach_variable(var, &nir->uniforms) { + if (strcmp(var->name, "gl_i965_cs_thread_local_id") == 0) { + prog_data.thread_local_id_index = var->data.driver_location / 4; + break; + } + } + assert(prog_data.thread_local_id_index >= 0); + if (prog->Comp.SharedSize > 64 * 1024) { prog->LinkStatus = false; const char *error_str = @@ -91,7 +101,7 @@ brw_codegen_cs_prog(struct brw_context *brw, * prog_data associated with the compiled program, and which will be freed * by the state cache. */ - int param_count = cp->program.Base.nir->num_uniforms; + int param_count = nir->num_uniforms; /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; @@ -104,7 +114,7 @@ brw_codegen_cs_prog(struct brw_context *brw, prog_data.base.nr_params = param_count; prog_data.base.nr_image_params = cs->base.NumImages; - brw_nir_setup_glsl_uniforms(cp->program.Base.nir, prog, &cp->program.Base, + brw_nir_setup_glsl_uniforms(nir, prog, &cp->program.Base, &prog_data.base, true); if (unlikely(brw->perf_debug)) { @@ -122,7 +132,7 @@ brw_codegen_cs_prog(struct brw_context *brw, char *error_str; program = brw_compile_cs(brw->intelScreen->compiler, brw, mem_ctx, - key, &prog_data, cp->program.Base.nir, + key, &prog_data, nir, st_index, &program_size, &error_str); if (program == NULL) { prog->LinkStatus = false; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 847a6d3656..b71e21dedf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2130,6 +2130,12 @@ fs_visitor::assign_constant_locations() } } + if (stage == MESA_SHADER_COMPUTE) { + brw_cs_prog_data *prog_data = (brw_cs_prog_data*)stage_prog_data; + if (!is_live[prog_data->thread_local_id_index]) + prog_data->thread_local_id_index = -1; + } + /* Only allow 16 registers (128 uniform components) as push constants. * * Just demote the end of the list. We could probably do better @@ -2208,6 +2214,11 @@ fs_visitor::assign_constant_locations() stage_prog_data->pull_param[pull_constant_loc[i]] = value; } else if (push_constant_loc[i] != -1) { stage_prog_data->param[push_constant_loc[i]] = value; + if (stage == MESA_SHADER_COMPUTE) { + brw_cs_prog_data *prog_data = (brw_cs_prog_data*)stage_prog_data; + if (prog_data->thread_local_id_index == (int)i) + prog_data->thread_local_id_index = push_constant_loc[i]; + } } } ralloc_free(param); diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 9274f2e161..7ee14d5d22 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -423,6 +423,20 @@ nir_optimize(nir_shader *nir, bool is_scalar) return nir; } +static void +add_cs_local_id_uniform(nir_shader *shader) +{ + int location = 0; + nir_foreach_variable(var, &shader->uniforms) { + location = MAX2(location, var->data.location); + } + + nir_variable *var = + nir_variable_create(shader, nir_var_uniform, glsl_uint_type(), + "gl_i965_cs_thread_local_id"); + var->data.location = location + 1; +} + /* Does some simple lowering and runs the standard suite of optimizations * * This is intended to be called more-or-less directly after you get the @@ -440,6 +454,9 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) const bool is_scalar = compiler->scalar_stage[nir->stage]; + if (nir->stage == MESA_SHADER_COMPUTE) + OPT_V(add_cs_local_id_uniform); + if (nir->stage == MESA_SHADER_GEOMETRY) OPT(nir_lower_gs_intrinsics); diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index b752ad5610..716a390e72 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -32,7 +32,7 @@ brw_nir_setup_glsl_builtin_uniform(nir_variable *var, bool is_scalar) { const nir_state_slot *const slots = var->state_slots; - assert(var->state_slots != NULL); + assert(var->num_state_slots == 0 || var->state_slots != NULL); unsigned uniform_index = var->data.driver_location / 4; for (unsigned int i = 0; i < var->num_state_slots; i++) { |