diff options
author | Abdiel Janulgue <abdiel.janulgue@linux.intel.com> | 2015-09-29 13:42:59 +0300 |
---|---|---|
committer | Abdiel Janulgue <abdiel.janulgue@linux.intel.com> | 2015-10-09 19:47:44 +0300 |
commit | 41ac473926ee0096508e893c391077cfe19d292d (patch) | |
tree | 77df517a53193478da3f3b6eff7d35a8282a3513 | |
parent | f5a8ca23b79fb497a94b759e818708ac03820d09 (diff) |
WIP: Use gather constants to repack uniforms.contiguous_uniforms
Instead of re-arranging the constant data in an intermediate buffer.
- Add separate gather table per stage
- working with unused array elements
- Matrix support
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 61 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 56 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 30 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 85 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 85 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.h | 11 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_vs_state.c | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_vs_state.c | 10 |
10 files changed, 327 insertions, 32 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 58edaf4588..d76d2b17a1 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -340,6 +340,57 @@ struct brw_shader { bool compiled_once; }; +struct brw_gather_table; + +struct brw_shader_program { + struct gl_shader_program base; + + drm_intel_bo *bo; /**< Actual uniform backing store */ + unsigned next_offset; + + /* Mapping to uniform storage locations in this shader program */ + struct storage_data { + struct brw_gather_table *gather_table; + + /** The start index of this uniform storage slot within the gather table */ + unsigned base_index; + + /** Arrays on this uniform storage can be condensed by the compiler. + * Each entry here points to the data used by a live array slot. + * For non-arrays, the data is in index 0. + */ + struct array_data { + uint16_t gather_index; + uint16_t array_index; + } *live_array_indices; + unsigned live_array_nr; + + /** If this uniform storage contains arrays, multiple updates are + * triggered per each array slot. This flag ensures that the offset + * is updated only once per uniform storage location. + */ + bool needs_update; + + unsigned size; + unsigned offset; + } *storage_table; +}; + +struct brw_gather_table +{ + int reg; + unsigned channel_mask; + unsigned const_block; + unsigned const_offset; + unsigned components; + + /** The remap_location of the uniform storage within the shader program + * if this gather entry is associated with a uniform storage, + * otherwise set as -1. + */ + int uniform_location; +}; + /* Note: If adding fields that need anything besides a normal memcmp() for * comparing them, be sure to go fix brw_stage_prog_data_compare(). */ @@ -379,6 +430,9 @@ struct brw_stage_prog_data { bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */ + /* Track the program associated with this stage */ + struct brw_shader_program* program; + /* Pointers to tracked values (only valid once * _mesa_load_state_parameters has been called at runtime). * @@ -389,12 +443,7 @@ struct brw_stage_prog_data { const gl_constant_value **pull_param; /** Combined gather table containing uniform and UBO entries */ - struct { - int reg; - unsigned channel_mask; - unsigned const_block; - unsigned const_offset; - } *gather_table; + struct brw_gather_table *gather_table; /** * Image metadata passed to the shader as uniforms. This is deliberately diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 6abe367525..4cbef718ec 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1771,6 +1771,19 @@ fs_visitor::compact_virtual_grfs() return progress; } +static +void copy_gather_table_entry(struct brw_gather_table *dst, + unsigned dst_index, + const struct brw_gather_table *src, + unsigned src_index) +{ + dst[dst_index].reg = src[src_index].reg; + dst[dst_index].channel_mask = src[src_index].channel_mask; + dst[dst_index].const_block = src[src_index].const_block; + dst[dst_index].const_offset = src[src_index].const_offset; + dst[dst_index].uniform_location = src[src_index].uniform_location; +} + /** * Assign UNIFORM file registers to either push constants or pull constants. * @@ -1898,26 +1911,53 @@ fs_visitor::assign_constant_locations() } int num_consts = ALIGN(prog_data->nr_params, 4) / 4; +#define CONT + +#ifndef CONT for (int i = 0; i < num_consts; i++) { int p = stage_prog_data->nr_gather_table++; stage_prog_data->gather_table[p].reg = -1; stage_prog_data->gather_table[p].channel_mask = const_reg_access[i]; } +#else + struct brw_shader_program *prog = (struct brw_shader_program *) shader_prog; + for (unsigned i = 0; i < this->nr_gather_table; i++) { + int const_idx = this->ubo_gather_table[i].reg; + if (push_constant_loc[const_idx] == -1) + continue; + + int loc = this->ubo_gather_table[i].uniform_location; + prog->storage_table[loc].gather_table = stage_prog_data->gather_table; + if (!prog->storage_table[loc].live_array_indices) { + prog->storage_table[loc].live_array_indices = + rzalloc_array(prog, brw_shader_program::storage_data::array_data, + prog_data->nr_params); + } + unsigned arr_idx = prog->storage_table[loc].live_array_nr++; + prog->storage_table[loc].live_array_indices[arr_idx].array_index + = i - prog->storage_table[loc].base_index; + prog->storage_table[loc].live_array_indices[arr_idx].gather_index = + stage_prog_data->nr_gather_table; + + copy_gather_table_entry(stage_prog_data->gather_table, + stage_prog_data->nr_gather_table++, + this->ubo_gather_table, i); + + } +#endif for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) { - int p = stage_prog_data->nr_gather_table++; - stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg; - stage_prog_data->gather_table[p].channel_mask = - this->ubo_gather_table[i].channel_mask; - stage_prog_data->gather_table[p].const_block = - this->ubo_gather_table[i].const_block; - stage_prog_data->gather_table[p].const_offset = - this->ubo_gather_table[i].const_offset; + int idx = stage_prog_data->nr_gather_table++; + copy_gather_table_entry(stage_prog_data->gather_table, idx, + this->ubo_gather_table, i); stage_prog_data->max_ubo_const_block = MAX2(stage_prog_data->max_ubo_const_block, this->ubo_gather_table[i].const_block); + /* UBOs don't have uniform storage */ + stage_prog_data->gather_table[idx].uniform_location = -1; } + stage_prog_data->program = (struct brw_shader_program *) shader_prog; } /** diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 9a50b991a8..389b1b9896 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -220,6 +220,7 @@ fs_visitor::nir_setup_uniform(nir_variable *var) * our name. */ unsigned index = var->data.driver_location; + struct brw_shader_program *prog = (struct brw_shader_program *) shader_prog; for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; @@ -239,9 +240,32 @@ fs_visitor::nir_setup_uniform(nir_variable *var) unsigned slots = storage->type->component_slots(); if (storage->array_elements) slots *= storage->array_elements; - + int loc = storage->remap_location; + int idx = 0; + unsigned element_size = MAX2(storage->array_elements, 1); for (unsigned i = 0; i < slots; i++) { - stage_prog_data->param[index++] = &storage->storage[i]; + idx = index++; + stage_prog_data->param[idx] = &storage->storage[i]; +#if 1 + if (i % (storage->type->is_matrix() ? storage->type->matrix_columns : + storage->type->component_slots()) != 0) + continue; + + int base_elements = storage->type->is_matrix() ? + storage->type->matrix_columns : storage->type->components(); + + int p = this->nr_gather_table++; + this->ubo_gather_table[p].reg = idx; + this->ubo_gather_table[p].components = + storage->type->component_slots(); + this->ubo_gather_table[p].channel_mask = + (2 << (base_elements - 1)) - 1; + this->ubo_gather_table[p].uniform_location = loc; + + if (i == 0) { + prog->storage_table[loc].base_index = p; + } +#endif } } } @@ -1853,7 +1877,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) shadow_comparitor, lod, lod2, lod_components, sample_index, tex_offset, mcs, gather_component, is_cube_array, is_rect, sampler, sampler_reg, texunit); - fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; unsigned num_components = nir_tex_instr_dest_size(instr); @@ -1924,6 +1947,7 @@ fs_visitor::nir_generate_ubo_gather_table(const brw::fs_builder &bld, this->ubo_gather_table[gather].reg = uniform_reg.reg; this->ubo_gather_table[gather].const_block = const_index->u[0]; this->ubo_gather_table[gather].const_offset = instr->const_index[0]; + this->ubo_gather_table[gather].uniform_location = -1; } ubo_uniforms += instr->num_components; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 59eb1225bd..097168c036 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1119,7 +1119,8 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, if (dispatch_width == 8) { this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params); - this->ubo_gather_table = rzalloc_array(mem_ctx, backend_shader::gather_table, + this->ubo_gather_table = rzalloc_array(mem_ctx, + struct brw_gather_table, stage_prog_data->nr_params); } } diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index aa805bed5c..3cbe20e490 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -40,6 +40,7 @@ #include "tnl/tnl.h" #include "util/ralloc.h" #include "glsl/ir.h" +#include "glsl/ir_uniform.h" #include "brw_context.h" #include "brw_shader.h" @@ -274,6 +275,87 @@ brw_get_scratch_bo(struct brw_context *brw, } } +static struct gl_shader_program * +brwNewShaderProgram(GLuint name) +{ + + struct brw_shader_program *shader_prog = + rzalloc(NULL, struct brw_shader_program); + if (shader_prog) { + shader_prog->base.Name = name; + _mesa_init_shader_program(&shader_prog->base); + return &shader_prog->base; + } + + assert(false); + return NULL; +} + +static void +brwDeleteShaderProgram(struct gl_context *ctx, + struct gl_shader_program *shProg) +{ + struct brw_shader_program *shader_prog = (struct brw_shader_program *) shProg; + drm_intel_bo_unreference(shader_prog->bo); + + _mesa_free_shader_program_data(ctx, shProg); + + ralloc_free(shProg); +} + +/** Updates the head pointer of our uniform storage ring-buffer and the offsets + * in the corresponding gather table entry whenever uniforms get updated. + */ +static void +brwUniformStorageChange(struct gl_context *ctx, struct gl_shader_program *prog, + struct gl_uniform_storage* storage) +{ + + if (!storage->num_driver_storage) + return; + + struct brw_context *brw = brw_context(ctx); + struct brw_shader_program *shader_prog = (struct brw_shader_program *) prog; + + int loc = storage->remap_location; + + if (shader_prog->storage_table[loc].needs_update && + shader_prog->storage_table[loc].live_array_nr > 0) { + + if ((shader_prog->next_offset + shader_prog->storage_table[loc].size) + > shader_prog->bo->size) { + /* Our uniform ringbuffer has finally wrapped */ + intel_batchbuffer_flush(brw); + shader_prog->next_offset = 0; + } + + storage->driver_storage[0].data = + shader_prog->bo->virtual + shader_prog->next_offset; + shader_prog->storage_table[loc].offset = shader_prog->next_offset; + shader_prog->storage_table[loc].needs_update = false; + + /** Update active entries in the gather table to reflect the new offsets. + * If the uniform storage is an array, the gather table might have been + * repacked. Only update the corresponding offsets in the gather table + * that are marked live. + */ + struct brw_gather_table *gt = shader_prog->storage_table[loc].gather_table; + + unsigned base_size = shader_prog->storage_table[loc].size / + MAX2(storage->array_elements, 1); + + for (int i = 0; i < shader_prog->storage_table[loc].live_array_nr; i++) { + uint16_t gather_idx = shader_prog->storage_table[loc] + .live_array_indices[i].gather_index; + uint16_t array_idx = shader_prog->storage_table[loc] + .live_array_indices[i].array_index; + gt[gather_idx].const_offset = + shader_prog->storage_table[loc].offset + (base_size * array_idx); + } + shader_prog->next_offset += shader_prog->storage_table[loc].size; + } +} + void brwInitFragProgFuncs( struct dd_function_table *functions ) { assert(functions->ProgramStringNotify == _tnl_program_string); @@ -281,6 +363,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ) functions->NewProgram = brwNewProgram; functions->DeleteProgram = brwDeleteProgram; functions->ProgramStringNotify = brwProgramStringNotify; + functions->NewShaderProgram = brwNewShaderProgram; + functions->DeleteShaderProgram = brwDeleteShaderProgram; + functions->UniformStoragePropagateNotify = brwUniformStorageChange; functions->NewShader = brw_new_shader; functions->LinkShader = brw_link_shader; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9d45cfe5a1..c9f8c264db 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -341,6 +341,88 @@ process_glsl_ir(gl_shader_stage stage, } } +static +void brw_associate_uniform_storage(struct gl_context *ctx, + struct gl_shader_program* shader_prog) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_shader_program *prog = (struct brw_shader_program *) shader_prog; + + if (!prog->bo) { + prog->bo = drm_intel_bo_alloc(brw->bufmgr, "uniform_buffer", 4096, 4096); + prog->next_offset = 0; + drm_intel_gem_bo_map_gtt(prog->bo); + } + + prog->storage_table = rzalloc_array(prog, brw_shader_program::storage_data, + shader_prog->NumUniformRemapTable); + + for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { + struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; + + if (storage->builtin || storage->type->is_image() || + storage->block_index != -1) + continue; + + enum gl_uniform_driver_format format = uniform_native; + + unsigned columns = 0; + int dmul = 4 * sizeof(float); + switch (storage->type->base_type) { + case GLSL_TYPE_UINT: + assert(ctx->Const.NativeIntegers); + format = uniform_native; + columns = 1; + break; + case GLSL_TYPE_INT: + format = + (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float; + columns = 1; + break; + + case GLSL_TYPE_DOUBLE: + if (storage->type->vector_elements > 2) + dmul *= 2; + /* fallthrough */ + case GLSL_TYPE_FLOAT: + format = uniform_native; + columns = storage->type->matrix_columns; + break; + case GLSL_TYPE_BOOL: + format = uniform_native; + columns = 1; + break; + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SUBROUTINE: + format = uniform_native; + columns = 1; + break; + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_VOID: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + assert(!"Should not get here."); + break; + } + + uint8_t* p = (uint8_t *) prog->bo->virt + prog->next_offset; + _mesa_uniform_attach_driver_storage(storage, + dmul * columns, + dmul, + format, + p); + unsigned size = dmul * columns * MAX2(storage->array_elements, 1); + + int loc = storage->remap_location; + prog->storage_table[loc].needs_update = true; + prog->storage_table[loc].size = size; + prog->storage_table[loc].offset = prog->next_offset; + } +} + GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) { @@ -407,6 +489,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) _mesa_reference_program(ctx, &prog, NULL); } + brw_associate_uniform_storage(ctx, shProg); + if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) { for (unsigned i = 0; i < shProg->NumShaders; i++) { const struct gl_shader *sh = shProg->Shaders[i]; @@ -920,6 +1004,7 @@ backend_shader::backend_shader(const struct brw_compiler *compiler, stage(stage), use_gather_constants(false), nr_ubo_gather_table(0), + nr_gather_table(0), ubo_gather_table(NULL) { debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index f0afce5dc5..e689097df6 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -276,16 +276,11 @@ public: void setup_image_uniform_values(unsigned param_offset, const gl_uniform_storage *storage); bool use_gather_constants; - unsigned nr_ubo_gather_table; + unsigned nr_ubo_gather_table; /* UBO uniforms */ + unsigned nr_gather_table; /* Ordinary uniforms*/ /** Gather table for UBO entries only */ - struct gather_table { - int reg; - unsigned channel_mask; - unsigned const_block; - unsigned const_offset; - }; - gather_table *ubo_gather_table; + struct brw_gather_table *ubo_gather_table; }; uint32_t brw_texture_offset(int *offsets, unsigned num_components); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 4bba4a51aa..9111d97674 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -3812,7 +3812,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->uniform_array_size += stage_prog_data->nr_ubo_params; this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); - this->ubo_gather_table = rzalloc_array(mem_ctx, backend_shader::gather_table, + this->ubo_gather_table = rzalloc_array(mem_ctx, struct brw_gather_table, this->uniform_array_size); } diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index bb375b9c9d..e26c38b346 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -71,20 +71,31 @@ gen6_upload_push_constants(struct brw_context *brw, gl_constant_value *param; unsigned i; + struct brw_shader_program *current = + (struct brw_shader_program *) ctx->_Shader->_CurrentFragmentProgram; +#define CONT + +#ifndef CONT const uint32_t size = prog_data->nr_params * sizeof(gl_constant_value); param = brw_state_batch(brw, type, size, - 32, &stage_state->push_const_offset); + 32, &stage_state->push_const_offset); +#endif if (brw->gather_pool.bo != NULL) { uint32_t surf_offset = 0; +#ifndef CONT brw_create_constant_surface(brw, brw->batch.bo, stage_state->push_const_offset, size, &surf_offset, false); +#else + brw_create_constant_surface(brw, current->bo, 0, + current->bo->size, &surf_offset, false); +#endif gen7_edit_hw_binding_table_entry(brw, stage_state->stage, BRW_UNIFORM_GATHER_INDEX_START, surf_offset); } - +#ifndef CONT STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); /* _NEW_PROGRAM_CONSTANTS @@ -112,6 +123,7 @@ gen6_upload_push_constants(struct brw_context *brw, fprintf(stderr, "\n"); fprintf(stderr, "\n"); } +#endif stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8; /* We can only push 32 registers of constants at a time. */ diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 4daf2bc238..4fdc4ab6f5 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -52,13 +52,13 @@ gen7_submit_gather_table(struct brw_context* brw, SET_FIELD(BRW_UNIFORM_GATHER_INDEX_START / 16, BRW_GATHER_BINDING_TABLE_BLOCK)); OUT_BATCH(stage_state->push_const_offset); + struct brw_shader_program* prog = prog_data->program; for (int i = 0; i < prog_data->nr_gather_table; i++) { /* Which bo are we referring to? The uniform constant buffer or * the UBO block? */ - bool is_uniform = prog_data->gather_table[i].reg == -1; - int cb_offset = is_uniform ? i : - (prog_data->gather_table[i].const_offset / 16); + bool is_uniform = prog_data->gather_table[i].uniform_location != -1; + int cb_offset = prog_data->gather_table[i].const_offset / 16; int bt_offset = is_uniform ? 0 : (prog_data->gather_table[i].const_block + BRW_UBO_GATHER_INDEX_APPEND); @@ -68,6 +68,10 @@ gen7_submit_gather_table(struct brw_context* brw, OUT_BATCH(SET_FIELD(cb_offset, BRW_GATHER_CONST_BUFFER_OFFSET) | SET_FIELD(prog_data->gather_table[i].channel_mask, BRW_GATHER_CHANNEL_MASK) | bt_offset); + + int loc = prog_data->gather_table[i].uniform_location; + if (loc > -1) + prog->storage_table[loc].needs_update = true; } ADVANCE_BATCH(); } |