summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAbdiel Janulgue <abdiel.janulgue@linux.intel.com>2015-04-14 11:17:46 +0300
committerAbdiel Janulgue <abdiel.janulgue@linux.intel.com>2015-09-10 12:22:28 +0300
commit3b6d8a56a64218f00848365a5b0fc50c7b6ddb9d (patch)
tree3525ae2bd34ee2bad3a11d0779e7e81240cb868d
parente6d7aa4db54a7bd54d5e8c59af586e6d9cb5e9ca (diff)
i965/vec4: Append ir_binop_ubo_load entries to the gather table
When the const block and offset are immediate values. Otherwise just fall-back to the previous method of uploading the UBO constant data to GRF using pull constants. Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp13
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp75
4 files changed, 92 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 4ba868398a..267d6a0814 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -601,6 +601,18 @@ vec4_visitor::generate_gather_table()
stage_prog_data->gather_table[p].reg = -1;
stage_prog_data->gather_table[p].channel_mask = 0xf;
}
+
+ for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) {
+ int p = stage_prog_data->nr_gather_table++;
+ stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg;
+ stage_prog_data->gather_table[p].channel_mask = this->ubo_gather_table[i].channel_mask;
+ stage_prog_data->gather_table[p].const_block = this->ubo_gather_table[i].const_block;
+ stage_prog_data->gather_table[p].const_offset = this->ubo_gather_table[i].const_offset;
+ stage_prog_data->max_ubo_const_block = MAX2(stage_prog_data->max_ubo_const_block,
+ this->ubo_gather_table[i].const_block);
+ }
+
+ stage_prog_data->nr_ubo_params = ubo_uniforms;
}
/**
@@ -1990,6 +2002,7 @@ brw_vs_emit(struct brw_context *brw,
vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
vp, prog, mem_ctx, st_index,
!_mesa_is_gles3(&brw->ctx));
+ v.use_gather_constants = brw->vs_ubo_gather && brw->use_resource_streamer;
if (!v.run(brw_select_clip_planes(&brw->ctx))) {
if (prog) {
prog->LinkStatus = false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 3d645bf04c..334001bd4e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -168,6 +168,7 @@ public:
int *uniform_vector_size;
int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
int uniforms;
+ int ubo_uniforms;
src_reg shader_start_time;
@@ -410,6 +411,7 @@ public:
void dump_instruction(backend_instruction *inst, FILE *file);
void visit_atomic_counter_intrinsic(ir_call *ir);
+ bool generate_ubo_gather_table(ir_expression *ir, const dst_reg &result_dst);
bool is_high_sampler(src_reg sampler);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 019efecac6..ab785a5b86 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -679,6 +679,8 @@ brw_gs_emit(struct brw_context *brw,
vec4_gs_visitor v(brw->intelScreen->compiler, brw,
c, prog, mem_ctx, true /* no_spills */, st_index);
+ v.use_gather_constants = brw->gs_ubo_gather &&
+ brw->use_resource_streamer;
if (v.run(NULL /* clip planes */)) {
return generate_assembly(brw, prog, &c->gp->program.Base,
&c->prog_data.base, mem_ctx, v.cfg,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index bee8621c69..30da0a021d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1843,6 +1843,12 @@ vec4_visitor::visit(ir_expression *ir)
break;
case ir_binop_ubo_load: {
+ /* Use gather push constants if at all possible, otherwise just
+ * fall back to pull constants for UBOs
+ */
+ if (generate_ubo_gather_table(ir, result_dst))
+ break;
+
ir_constant *const_uniform_block = ir->operands[0]->as_constant();
ir_constant *const_offset_ir = ir->operands[1]->as_constant();
unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
@@ -3758,6 +3764,67 @@ vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg)
*reg = neg_result;
}
+bool
+vec4_visitor::generate_ubo_gather_table(ir_expression *ir, const dst_reg &result_dst)
+{
+ ir_constant *const_uniform_block = ir->operands[0]->as_constant();
+ ir_constant *const_offset_ir = ir->operands[1]->as_constant();
+ unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
+
+ if (ir->operation != ir_binop_ubo_load ||
+ !use_gather_constants ||
+ !const_uniform_block ||
+ !const_offset_ir)
+ return false;
+
+ /* Only allow 32 registers (256 uniform components) as push constants,
+ */
+ int max_uniform_components = 32 * 8;
+ int param_index = uniforms + ubo_uniforms;
+ if ((param_index + ir->type->vector_elements) >= max_uniform_components)
+ return false;
+
+ dst_reg reg;
+ for (int i = 0; i < (int) this->nr_ubo_gather_table; i++) {
+ if ((this->ubo_gather_table[i].const_block ==
+ const_uniform_block->value.u[0]) &&
+ (this->ubo_gather_table[i].const_offset ==
+ const_offset)) {
+ reg = dst_reg(UNIFORM, this->ubo_gather_table[i].reg);
+ break;
+ }
+ }
+
+ if (reg.file != UNIFORM) {
+ reg = dst_reg(UNIFORM, param_index);
+ uniform_vector_size[param_index] = ir->type->vector_elements;
+
+ int gather = this->nr_ubo_gather_table++;
+ this->ubo_gather_table[gather].reg = reg.reg;
+ this->ubo_gather_table[gather].const_block =
+ const_uniform_block->value.u[0];
+ this->ubo_gather_table[gather].const_offset = const_offset;
+
+ for (int i = 0; i < ir->type->vector_elements; i++) {
+ this->ubo_gather_table[gather].channel_mask |= (1 << i);
+ }
+ this->ubo_gather_table[gather].channel_mask <<= (const_offset % 16) / 4;
+ this->ubo_uniforms += ir->type->vector_elements;
+ }
+ reg.type = brw_type_for_base_type(ir->type);
+
+ src_reg consts = src_reg(reg);
+ consts.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
+
+ if (ir->type->base_type == GLSL_TYPE_BOOL) {
+ emit(CMP(result_dst, consts, src_reg(0u), BRW_CONDITIONAL_NZ));
+ } else {
+ this->result = consts;
+ }
+
+ return true;
+}
+
vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
void *log_data,
struct gl_program *prog,
@@ -3797,6 +3864,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
this->uniforms = 0;
+ this->ubo_uniforms = 0;
/* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
* at least one. See setup_uniforms() in brw_vec4.cpp.
@@ -3807,8 +3875,15 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
}
+ /* Gather constants hardware treats each fetch in 16-byte units
+ * So reflect size of each UBO fetch as vectors even if they contain
+ * less than 4 components
+ */
+ this->uniform_array_size += stage_prog_data->nr_ubo_params;
this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
+ this->ubo_gather_table = rzalloc_array(mem_ctx, backend_shader::gather_table,
+ this->uniform_array_size);
}
vec4_visitor::~vec4_visitor()