summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Justen <jordan.l.justen@intel.com>2016-05-22 21:29:53 -0700
committerJordan Justen <jordan.l.justen@intel.com>2016-05-23 10:15:25 -0700
commit43d775549d244c86e80ba00b4dd9add07bd72cce (patch)
tree681a2d04f1158d3c070d24f8dd252397dccc6b41
parent6717700397a44aaa7cd76b56c6cc9831e0537643 (diff)
i965: Add uniform to hold the CS thread ID base
This thread ID will be used to compute the gl_LocalInvocationIndex and gl_LocalInvocationID values. Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp11
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c17
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp2
5 files changed, 43 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index fb0e9aec05..22f231620c 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -425,6 +425,7 @@ struct brw_cs_prog_data {
bool uses_barrier;
bool uses_num_work_groups;
unsigned local_invocation_id_regs;
+ int thread_local_id_index;
struct {
/** @{
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
index 0ab9ebdab3..74e66bc7ef 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -64,6 +64,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
struct brw_cs_prog_data prog_data;
bool start_busy = false;
double start_time = 0;
+ nir_shader *nir = cp->program.Base.nir;
struct brw_shader *cs =
(struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
@@ -71,6 +72,15 @@ brw_codegen_cs_prog(struct brw_context *brw,
memset(&prog_data, 0, sizeof(prog_data));
+ prog_data.thread_local_id_index = -1;
+ nir_foreach_variable(var, &nir->uniforms) {
+ if (strcmp(var->name, "gl_i965_cs_thread_local_id") == 0) {
+ prog_data.thread_local_id_index = var->data.driver_location / 4;
+ break;
+ }
+ }
+ assert(prog_data.thread_local_id_index >= 0);
+
if (prog->Comp.SharedSize > 64 * 1024) {
prog->LinkStatus = false;
const char *error_str =
@@ -91,7 +101,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
* prog_data associated with the compiled program, and which will be freed
* by the state cache.
*/
- int param_count = cp->program.Base.nir->num_uniforms;
+ int param_count = nir->num_uniforms;
/* The backend also sometimes adds params for texture size. */
param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
@@ -104,7 +114,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
prog_data.base.nr_params = param_count;
prog_data.base.nr_image_params = cs->base.NumImages;
- brw_nir_setup_glsl_uniforms(cp->program.Base.nir, prog, &cp->program.Base,
+ brw_nir_setup_glsl_uniforms(nir, prog, &cp->program.Base,
&prog_data.base, true);
if (unlikely(brw->perf_debug)) {
@@ -122,7 +132,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
char *error_str;
program = brw_compile_cs(brw->intelScreen->compiler, brw, mem_ctx,
- key, &prog_data, cp->program.Base.nir,
+ key, &prog_data, nir,
st_index, &program_size, &error_str);
if (program == NULL) {
prog->LinkStatus = false;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 847a6d3656..b71e21dedf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2130,6 +2130,12 @@ fs_visitor::assign_constant_locations()
}
}
+ if (stage == MESA_SHADER_COMPUTE) {
+ brw_cs_prog_data *prog_data = (brw_cs_prog_data*)stage_prog_data;
+ if (!is_live[prog_data->thread_local_id_index])
+ prog_data->thread_local_id_index = -1;
+ }
+
/* Only allow 16 registers (128 uniform components) as push constants.
*
* Just demote the end of the list. We could probably do better
@@ -2208,6 +2214,11 @@ fs_visitor::assign_constant_locations()
stage_prog_data->pull_param[pull_constant_loc[i]] = value;
} else if (push_constant_loc[i] != -1) {
stage_prog_data->param[push_constant_loc[i]] = value;
+ if (stage == MESA_SHADER_COMPUTE) {
+ brw_cs_prog_data *prog_data = (brw_cs_prog_data*)stage_prog_data;
+ if (prog_data->thread_local_id_index == (int)i)
+ prog_data->thread_local_id_index = push_constant_loc[i];
+ }
}
}
ralloc_free(param);
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 9274f2e161..7ee14d5d22 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -423,6 +423,20 @@ nir_optimize(nir_shader *nir, bool is_scalar)
return nir;
}
+static void
+add_cs_local_id_uniform(nir_shader *shader)
+{
+ int location = 0;
+ nir_foreach_variable(var, &shader->uniforms) {
+ location = MAX2(location, var->data.location);
+ }
+
+ nir_variable *var =
+ nir_variable_create(shader, nir_var_uniform, glsl_uint_type(),
+ "gl_i965_cs_thread_local_id");
+ var->data.location = location + 1;
+}
+
/* Does some simple lowering and runs the standard suite of optimizations
*
* This is intended to be called more-or-less directly after you get the
@@ -440,6 +454,9 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
const bool is_scalar = compiler->scalar_stage[nir->stage];
+ if (nir->stage == MESA_SHADER_COMPUTE)
+ OPT_V(add_cs_local_id_uniform);
+
if (nir->stage == MESA_SHADER_GEOMETRY)
OPT(nir_lower_gs_intrinsics);
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index b752ad5610..716a390e72 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -32,7 +32,7 @@ brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
bool is_scalar)
{
const nir_state_slot *const slots = var->state_slots;
- assert(var->state_slots != NULL);
+ assert(var->num_state_slots == 0 || var->state_slots != NULL);
unsigned uniform_index = var->data.driver_location / 4;
for (unsigned int i = 0; i < var->num_state_slots; i++) {