diff options
author | Abdiel Janulgue <abdiel.janulgue@linux.intel.com> | 2015-10-16 13:43:56 +0300 |
---|---|---|
committer | Abdiel Janulgue <abdiel.janulgue@linux.intel.com> | 2015-10-16 13:43:56 +0300 |
commit | 3675bdb3bdee1021dc3d61bbbad7e4ed6bfc0e44 (patch) | |
tree | ae36da7c4cae3a6c315ccbd968953d6608986f79 | |
parent | f762d4f98e8aebac68227bf0f82287245d4a65ee (diff) |
Don't wait until the ringbuffer is full to reset the offsets. Reset it right away on batch flushcontiguous_uniforms2
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 110 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 9 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_vs_state.c | 10 |
6 files changed, 91 insertions, 57 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 16f7c42401..394ac17cd6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -348,7 +348,10 @@ struct brw_shader_program { drm_intel_bo *bo; /**< Actual uniform backing store */ unsigned next_offset; - /* Mapping to uniform storage locations in this shader program */ + /** If true, the head pointer should be reset to zero at next use.*/ + bool zero_offsets; + + /** Mapping to uniform storage locations in this shader program */ struct storage_data { struct brw_gather_table *gather_table; @@ -374,6 +377,9 @@ struct brw_shader_program { unsigned size; unsigned offset; } *storage_table; + + /** Pointers to the update flag of live uniform storages */ + bool **reset_update_flags; }; struct brw_gather_table diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 481fa63290..92ee521e6a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1936,12 +1936,14 @@ fs_visitor::assign_constant_locations() struct brw_shader_program *prog = (struct brw_shader_program *) shader_prog; for (unsigned i = 0; i < this->nr_gather_table; i++) { int const_idx = this->ubo_gather_table[i].reg; + int loc = this->ubo_gather_table[i].uniform_location; + if (push_constant_loc[const_idx] == -1) { - printf("Skip GT[%d]\n", i); + printf("Skip GT[%d] | loc: %d\n", i, + prog->storage_table[loc].base_index); continue; } - int loc = this->ubo_gather_table[i].uniform_location; prog->storage_table[loc].gather_table = stage_prog_data->gather_table; if (!prog->storage_table[loc].live_array_indices) { diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index c5211b30c1..d60374ad5b 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -46,6 +46,7 @@ #include "brw_shader.h" #include "brw_nir.h" #include "brw_wm.h" +#include "brw_state.h" #include "intel_batchbuffer.h" static unsigned @@ -319,55 +320,72 @@ brwUniformStorageChange(struct gl_context *ctx, struct gl_shader_program *prog, int loc = storage->remap_location; - printf(" Try update stor: 0x%x | next_off: %d\n", - storage, shader_prog->next_offset - ); - if (shader_prog->storage_table[loc].needs_update && - shader_prog->storage_table[loc].live_array_nr > 0) { - - if ((shader_prog->next_offset + shader_prog->storage_table[loc].size) - > shader_prog->bo->size) { - /* Our uniform ringbuffer has finally wrapped */ - intel_batchbuffer_flush(brw); - shader_prog->next_offset = 0; + /* Always clear out the ring whenever the batch flushes */ + if (!brw_state_dirty(brw, 0, BRW_NEW_BATCH)) { + shader_prog->zero_offsets = false; + } else if (!shader_prog->zero_offsets) { + printf(" --------RESETTING to Zero0000000 nrp: %d-----\n", + prog->NumUniformStorage); + + shader_prog->zero_offsets = true; + shader_prog->next_offset = 0; + for (int i = 0; i < prog->NumUniformStorage; i++) { + *shader_prog->reset_update_flags[i] = true; } + } + printf("Try update stor: 0x%x [loc: %d] | next_off: %d | bat reset? %s\n", + storage, loc, shader_prog->next_offset, + shader_prog->zero_offsets ? "YES":"NO"); - storage->driver_storage[0].data = - shader_prog->bo->virtual + shader_prog->next_offset; - - /* Update the current offset within the buffer */ - printf(" ++ Update stor: 0x%x prev offset: %d to %d\n", - storage, shader_prog->storage_table[loc].offset, - shader_prog->next_offset); - shader_prog->storage_table[loc].offset = shader_prog->next_offset; - shader_prog->storage_table[loc].needs_update = false; - - /* Update the gather table to reflect the new offsets */ - struct brw_gather_table *gt = shader_prog->storage_table[loc].gather_table; - - printf(" Active indices: 0x%lx | livearr: %d\n",0, - shader_prog->storage_table[loc].live_array_nr); - unsigned base_size = shader_prog->storage_table[loc].size / - MAX2(storage->array_elements, 1); - /** Update active entries in the gather table. If the uniform - * storage is an array, the gather table might have been repacked. Only - * update the corresponding offsets in the gather table that are - * marked live. - */ - for (int i = 0; i < shader_prog->storage_table[loc].live_array_nr; i++) { - uint16_t gather_idx = shader_prog->storage_table[loc] - .live_array_indices[i].gather_index; - uint16_t array_idx = shader_prog->storage_table[loc] - .live_array_indices[i].array_index; - gt[gather_idx].const_offset = - shader_prog->storage_table[loc].offset + (base_size * array_idx); - - printf(" GT[%d] = %d | new offset: %d | base: %d\n", - gather_idx, array_idx, - gt[gather_idx].const_offset, 0); - } - shader_prog->next_offset += shader_prog->storage_table[loc].size; + if (!shader_prog->storage_table[loc].needs_update) + return; + + /* Unlikely, but do an explicit flush when the ringbuffer is full enough that + * the uniform offset starts to wrap around. + */ + if ((shader_prog->next_offset + shader_prog->storage_table[loc].size) + > shader_prog->bo->size) { + intel_batchbuffer_flush(brw); + shader_prog->next_offset = 0; + printf(" ==-- BATCH FLUSHED --== \n"); + } + + storage->driver_storage[0].data = + shader_prog->bo->virtual + shader_prog->next_offset; + + /* Update the current offset within the buffer */ + printf(" ++ Update stor: 0x%x prev offset: %d to Current: %d\n", + storage, shader_prog->storage_table[loc].offset, + shader_prog->next_offset); + shader_prog->storage_table[loc].offset = shader_prog->next_offset; + shader_prog->storage_table[loc].needs_update = false; + + /* Update the gather table to reflect the new offsets */ + struct brw_gather_table *gt = shader_prog->storage_table[loc].gather_table; + + printf(" =Active indices: 0x%lx | livearr: %d\n",0, + shader_prog->storage_table[loc].live_array_nr); + unsigned base_size = shader_prog->storage_table[loc].size / + MAX2(storage->array_elements, 1); + + /** Update active entries in the gather table. If the uniform + * storage is an array, the gather table might have been repacked. Only + * update the corresponding offsets in the gather table that are + * marked live. + */ + for (int i = 0; i < shader_prog->storage_table[loc].live_array_nr; i++) { + uint16_t gather_idx = shader_prog->storage_table[loc] + .live_array_indices[i].gather_index; + uint16_t array_idx = shader_prog->storage_table[loc] + .live_array_indices[i].array_index; + gt[gather_idx].const_offset = + shader_prog->storage_table[loc].offset + (base_size * array_idx); + + printf(" GT[%d] = %d | new offset: %d | base: %d\n", + gather_idx, array_idx, + gt[gather_idx].const_offset, 0); } + shader_prog->next_offset += shader_prog->storage_table[loc].size; } void brwInitFragProgFuncs( struct dd_function_table *functions ) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 37fd108270..936cff7ce5 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -356,7 +356,10 @@ void brw_associate_uniform_storage(struct gl_context *ctx, prog->storage_table = rzalloc_array(prog, brw_shader_program::storage_data, shader_prog->NumUniformRemapTable); + prog->reset_update_flags = rzalloc_array(prog, bool *, + shader_prog->NumUniformRemapTable); + int flags = 0; for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; @@ -422,8 +425,10 @@ void brw_associate_uniform_storage(struct gl_context *ctx, prog->storage_table[loc].offset = prog->next_offset; // prog->next_offset += size; - printf(" Attach Storage size: %d dmul: %d cols: %d els: %d | nextoff:%d\n", - size, dmul, columns, MAX2(storage->array_elements, 1), + prog->reset_update_flags[flags++] = &prog->storage_table[loc].needs_update; + printf(" Attach Storage size: %d [loc:%d] " + "dmul: %d cols: %d els: %d | nextoff:%d\n", + size, loc, dmul, columns, MAX2(storage->array_elements, 1), prog->next_offset); } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 9460544c0c..4a4e4674f9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -218,12 +218,13 @@ vec4_visitor::nir_setup_uniform(nir_variable *var) assert(uniforms < uniform_array_size); uniform_vector_size[uniforms] = storage->type->vector_elements; - printf("UNI[%d]\n", uniforms); + printf("UNI[%d] size: %d\n", uniforms, uniform_vector_size[uniforms]); int i; for (i = 0; i < uniform_vector_size[uniforms]; i++) { int idx = uniforms * 4 + i; stage_prog_data->param[idx] = components; - printf(" i:%d uni:%d p[%d]\n", i, uniforms, idx); + printf(" i:%d uni:%d p[%d] loc: %d\n", i, + uniforms, idx, loc); components++; } for (; i < 4; i++) { diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 0dd929e1e4..dbba510df3 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -44,7 +44,13 @@ gen7_submit_gather_table(struct brw_context* brw, cb_valid |= (prog_data->nr_ubo_params > 0) ? (2 << (BRW_UBO_GATHER_INDEX_APPEND + prog_data->max_ubo_const_block)) - 1 : 0; + printf(" Gen %s GT for stage_prog: 0x%x | gt: 0x%x | num: %d\n", + _mesa_shader_stage_to_string(stage_state->stage), + prog_data, + prog_data->gather_table, + prog_data->nr_gather_table); assert(cb_valid < 0xffff); + assert(prog_data->nr_gather_table > 0 && prog_data->nr_gather_table < 128); BEGIN_BATCH(gather_dwords); OUT_BATCH(gather_opcode << 16 | (gather_dwords - 2)); @@ -52,10 +58,6 @@ gen7_submit_gather_table(struct brw_context* brw, SET_FIELD(BRW_UNIFORM_GATHER_INDEX_START / 16, BRW_GATHER_BINDING_TABLE_BLOCK)); OUT_BATCH(stage_state->push_const_offset); - printf(" Gen %s GT for stage_prog: 0x%x | gt: 0x%x\n", - _mesa_shader_stage_to_string(stage_state->stage), - prog_data, - prog_data->gather_table); struct brw_shader_program* prog = prog_data->program; for (int i = 0; i < prog_data->nr_gather_table; i++) { /* Which bo are we referring to? The uniform constant buffer or |