diff options
author | Francisco Jerez <currojerez@riseup.net> | 2016-05-19 12:50:01 +0200 |
---|---|---|
committer | Iago Toral Quiroga <itoral@igalia.com> | 2016-05-23 08:56:57 +0200 |
commit | b46867cd378e5fb135fd060d50c8028d3dac622a (patch) | |
tree | 3ae8eb57917b0af172f02699c7e450a3febbb346 | |
parent | 38b719d6244e831effe93a5d10e932aad2100984 (diff) |
i965/fs: do not depend on std140 alignment rules for UBO loads
The previous implementation relied on the std140 alignment rules to
avoid handling misalignment in the case where we are loading more than
2 double components from a vector, which requires to emit a second load
message.
This alternative implementation deals with misalignment and is more
flexible going forward.
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 59 |
1 files changed, 13 insertions, 46 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index d5ef569536..b7df28663d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -3585,9 +3585,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr nir->info.num_ubos - 1); } - /* Number of 32-bit slots in the type */ - unsigned type_slots = MAX2(1, type_sz(dest.type) / 4); - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); if (const_offset == NULL) { fs_reg base_offset = retype(get_nir_src(instr->src[1]), @@ -3605,55 +3602,25 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * we let CSE deal with duplicate loads. Here we see a vector access * and we have to split it if necessary. */ - fs_reg packed_consts = vgrf(glsl_type::float_type); - packed_consts.type = dest.type; + const unsigned type_size = type_sz(dest.type); + const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F); + for (unsigned c = 0; c < instr->num_components;) { + const unsigned base = const_offset->u32[0] + c * type_size; - unsigned const_offset_aligned = const_offset->u32[0] & ~15; + /* Number of usable components in the next 16B-aligned load */ + const unsigned count = MIN2(instr->num_components - c, + (16 - base % 16) / type_size); - /* A vec4 only contains half of a dvec4, if we need more than 2 - * components of a dvec4 we will have to issue another load for - * components z and w. - */ - int num_components; - if (type_slots == 1) - num_components = instr->num_components; - else - num_components = MIN2(2, instr->num_components); - - /* The computation of num_components doesn't take into account - * misalignment, which should be okay according to std140 vector - * alignment rules. - */ - assert(const_offset->u32[0] % 16 + - type_sz(dest.type) * num_components <= 16); - - int remaining_components = instr->num_components; - while (remaining_components > 0) { - /* Read the vec4 from a 16-byte aligned offset */ - struct brw_reg const_offset_reg = brw_imm_ud(const_offset_aligned); bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - retype(packed_consts, BRW_REGISTER_TYPE_F), - surf_index, const_offset_reg); - - const fs_reg consts = byte_offset(packed_consts, (const_offset->u32[0] % 16)); - unsigned dest_offset = instr->num_components - remaining_components; + packed_consts, surf_index, brw_imm_ud(base & ~15)); - /* XXX: This doesn't update the sub-16B offset across iterations of - * the loop, which should work for std140 vector alignment rules. - */ - assert(dest_offset == 0 || const_offset->u32[0] % 16 == 0); + const fs_reg consts = + retype(byte_offset(packed_consts, base & 15), dest.type); - for (int i = 0; i < num_components; i++) - bld.MOV(offset(dest, bld, i + dest_offset), component(consts, i)); + for (unsigned d = 0; d < count; d++) + bld.MOV(offset(dest, bld, c + d), component(consts, d)); - /* If this is a large enough 64-bit load, we will need to emit - * another message - */ - remaining_components -= num_components; - assert(remaining_components == 0 || - (remaining_components <= 2 && type_slots == 2)); - num_components = remaining_components; - const_offset_aligned += 16; + c += count; } } break; |