summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAbdiel Janulgue <abdiel.janulgue@linux.intel.com>2015-04-22 11:46:51 +0300
committerAbdiel Janulgue <abdiel.janulgue@linux.intel.com>2015-09-11 11:21:33 +0300
commitb26761541fd863762fd6f7f9876ff8bf0945a5b8 (patch)
tree840c407900208a932e70220c27c50232c85542fc
parent43a5058bf41dbf38973ddc331e7bd65b21dc5445 (diff)
i965/fs/nir: Append nir_intrinsic_load_ubo entries to the gather table
When the const block and offset are immediate values. Otherwise just fall-back to the previous method of uploading the UBO constant data to GRF using pull constants. Cc: kenneth@whitecape.org Cc: jason@jlekstrand.net Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp17
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp68
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp6
4 files changed, 96 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e39d82176c..ad084af543 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1865,6 +1865,7 @@ fs_visitor::assign_constant_locations()
stage_prog_data->nr_pull_params = num_pull_constants;
stage_prog_data->nr_params = 0;
+ stage_prog_data->nr_ubo_params = ubo_uniforms;
unsigned const_reg_access[uniforms];
memset(const_reg_access, 0, sizeof(const_reg_access));
@@ -1899,6 +1900,20 @@ fs_visitor::assign_constant_locations()
stage_prog_data->gather_table[p].channel_mask =
const_reg_access[i];
}
+
+ for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) {
+ int p = stage_prog_data->nr_gather_table++;
+ stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg;
+ stage_prog_data->gather_table[p].channel_mask =
+ this->ubo_gather_table[i].channel_mask;
+ stage_prog_data->gather_table[p].const_block =
+ this->ubo_gather_table[i].const_block;
+ stage_prog_data->gather_table[p].const_offset =
+ this->ubo_gather_table[i].const_offset;
+ stage_prog_data->max_ubo_const_block =
+ MAX2(stage_prog_data->max_ubo_const_block,
+ this->ubo_gather_table[i].const_block);
+ }
}
/**
@@ -5171,6 +5186,7 @@ brw_wm_fs_emit(struct brw_context *brw,
fs_visitor v(brw->intelScreen->compiler, brw,
mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
prog, &fp->Base, 8, st_index8);
+ v.use_gather_constants = brw->fs_ubo_gather && brw->use_resource_streamer;
if (!v.run_fs(false /* do_rep_send */)) {
if (prog) {
prog->LinkStatus = false;
@@ -5187,6 +5203,7 @@ brw_wm_fs_emit(struct brw_context *brw,
fs_visitor v2(brw->intelScreen->compiler, brw,
mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
prog, &fp->Base, 16, st_index16);
+ v2.use_gather_constants = v.use_gather_constants;
if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
if (!v.simd16_unsupported) {
/* Try a SIMD16 compile */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index dd0526a155..ded007a407 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -293,6 +293,9 @@ public:
unsigned n);
int implied_mrf_writes(fs_inst *inst);
+ bool nir_generate_ubo_gather_table(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr, fs_reg &dest,
+ bool has_indirect);
virtual void dump_instructions();
virtual void dump_instructions(const char *name);
@@ -316,6 +319,9 @@ public:
/** Number of uniform variable components visited. */
unsigned uniforms;
+ /** Number of ubo uniform variable components visited. */
+ unsigned ubo_uniforms;
+
/** Byte-offset for the next available spot in the scratch space buffer. */
unsigned last_scratch;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index a6c6a2fa8d..9a50b991a8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1488,6 +1488,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
has_indirect = true;
/* fallthrough */
case nir_intrinsic_load_ubo: {
+ if (nir_generate_ubo_gather_table(bld, instr, dest, has_indirect))
+ break;
+
nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
fs_reg surf_index;
@@ -1874,3 +1877,68 @@ fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr)
unreachable("unknown jump");
}
}
+
+bool
+fs_visitor::nir_generate_ubo_gather_table(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr,
+ fs_reg &dest,
+ bool has_indirect)
+{
+ const nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
+
+ if (!const_index || has_indirect || !use_gather_constants)
+ return false;
+
+ /* Only allow 16 registers (128 uniform components) as push constants.
+ */
+ static const unsigned max_push_components = 16 * 8;
+ const unsigned param_index = uniforms + ubo_uniforms;
+ if ((param_index + instr->num_components) > max_push_components)
+ return false;
+
+ /* Only SIMD8 gets to assign push constant register locations and generate
+ * the gather table masks.
+ */
+ bool generate_gather_table = (dispatch_width == 8);
+
+ fs_reg uniform_reg;
+ if (!generate_gather_table) {
+ for (int i = 0; i < (int) this->nr_ubo_gather_table; i++) {
+ if ((this->ubo_gather_table[i].const_block ==
+ const_index->u[0]) &&
+ (this->ubo_gather_table[i].const_offset ==
+ (unsigned) instr->const_index[0])) {
+ uniform_reg = fs_reg(UNIFORM, this->ubo_gather_table[i].reg);
+ break;
+ }
+ }
+ assert(uniform_reg.file == UNIFORM);
+ }
+
+ int gather = -1;
+ if (generate_gather_table) {
+ uniform_reg = fs_reg(UNIFORM, 0);
+ gather = this->nr_ubo_gather_table++;
+
+ assert(instr->num_components <= 4);
+ this->ubo_gather_table[gather].reg = uniform_reg.reg;
+ this->ubo_gather_table[gather].const_block = const_index->u[0];
+ this->ubo_gather_table[gather].const_offset = instr->const_index[0];
+ }
+
+ ubo_uniforms += instr->num_components;
+ for (unsigned j = 0; j < instr->num_components; j++) {
+ fs_reg src = offset(retype(uniform_reg, dest.type), bld,
+ j + param_index);
+ bld.MOV(dest, src);
+ dest = offset(dest, bld, 1);
+
+ if (gather != -1) {
+ unsigned mask = ((instr->const_index[0] % 16) == 0) ?
+ 1 << j : 1 << (((instr->const_index[0] % 16) / 4) + j);
+ this->ubo_gather_table[gather].channel_mask |= mask;
+ }
+ }
+
+ return true;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 5cb794b5fd..59eb1225bd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1109,6 +1109,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
this->regs_live_at_ip = NULL;
this->uniforms = 0;
+ this->ubo_uniforms = 0;
this->last_scratch = 0;
this->pull_constant_loc = NULL;
this->push_constant_loc = NULL;
@@ -1116,8 +1117,11 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
this->spilled_any_registers = false;
this->do_dual_src = false;
- if (dispatch_width == 8)
+ if (dispatch_width == 8) {
this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params);
+ this->ubo_gather_table = rzalloc_array(mem_ctx, backend_shader::gather_table,
+ stage_prog_data->nr_params);
+ }
}
fs_visitor::~fs_visitor()