diff options
author | Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> | 2021-01-29 18:06:02 -0500 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-02-11 17:24:37 +0000 |
commit | db7e2dce1c83f826f01a6d9508b84afe34097347 (patch) | |
tree | f892fdf5237a453a7dd7984d382ed81680d74fe3 | |
parent | 0dc539a87227e88bbee790ecc6ec8ec6eb991333 (diff) |
panfrost: Move sysvals to dedicated UBO
This makes UBO 0 less special, allowing us to generalize uniform
optimization. Note this disables RMU on Midgard as we're about to
rewrite the RMU mechanism.
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8973>
-rw-r--r-- | src/gallium/drivers/panfrost/pan_assemble.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_cmdstream.c | 36 | ||||
-rw-r--r-- | src/panfrost/bifrost/bifrost_compile.c | 24 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_compile.c | 9 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_schedule.c | 2 |
5 files changed, 19 insertions, 60 deletions
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index 9508785bb1b..31d262cb039 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -399,12 +399,8 @@ panfrost_shader_compile(struct panfrost_context *ctx, state->attribute_count = attribute_count; state->varying_count = varying_count; - /* Uniforms have been lowered to UBOs using nir_lower_uniforms_to_ubo() - * which already increments s->info.num_ubos. We do have to account for - * the "no uniform, no UBO" case though, otherwise sysval passed - * through uniforms won't work correctly. - */ - state->ubo_count = MAX2(s->info.num_ubos, 1); + /* Sysvals have dedicated UBO */ + state->ubo_count = s->info.num_ubos + (state->sysval_count ? 1 : 0); /* Prepare the descriptors at compile-time */ state->shader.shader = shader; diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 55a52f6eded..40bfec8568d 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -989,55 +989,39 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, return 0; struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage]; - struct panfrost_shader_state *ss = &all->variants[all->active_variant]; - /* Uniforms are implicitly UBO #0 */ - bool has_uniforms = buf->enabled_mask & (1 << 0); - /* Allocate room for the sysval and the uniforms */ size_t sys_size = sizeof(float) * 4 * ss->sysval_count; - size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0; - size_t size = sys_size + uniform_size; struct panfrost_ptr transfer = - panfrost_pool_alloc_aligned(&batch->pool, size, 16); + panfrost_pool_alloc_aligned(&batch->pool, sys_size, 16); /* Upload sysvals requested by the shader */ panfrost_upload_sysvals(batch, transfer.cpu, ss, stage); - /* Upload uniforms */ - if (has_uniforms && uniform_size) { - const void *cpu = panfrost_map_constant_buffer_cpu(ctx, buf, 0); - memcpy(transfer.cpu + sys_size, cpu, uniform_size); - } - - /* Next up, attach UBOs. UBO #0 is the uniforms we just - * uploaded, so it's always included. The count is the highest UBO - * addressable -- gaps are included. */ - - unsigned ubo_count = 32 - __builtin_clz(buf->enabled_mask | 1); + /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */ + struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage); + unsigned ubo_count = shader->ubo_count - (sys_size ? 1 : 0); - size_t sz = MALI_UNIFORM_BUFFER_LENGTH * ubo_count; + size_t sz = MALI_UNIFORM_BUFFER_LENGTH * (ubo_count + 1); struct panfrost_ptr ubos = panfrost_pool_alloc_aligned(&batch->pool, sz, MALI_UNIFORM_BUFFER_LENGTH); uint64_t *ubo_ptr = (uint64_t *) ubos.cpu; - /* Upload uniforms as a UBO */ + /* Upload sysval as a final UBO */ - if (size) { - pan_pack(ubo_ptr, UNIFORM_BUFFER, cfg) { - cfg.entries = DIV_ROUND_UP(size, 16); + if (sys_size) { + pan_pack(ubo_ptr + ubo_count, UNIFORM_BUFFER, cfg) { + cfg.entries = DIV_ROUND_UP(sys_size, 16); cfg.pointer = transfer.gpu; } - } else { - *ubo_ptr = 0; } /* The rest are honest-to-goodness UBOs */ - for (unsigned ubo = 1; ubo < ubo_count; ++ubo) { + for (unsigned ubo = 0; ubo < ubo_count; ++ubo) { size_t usz = buf->cb[ubo].buffer_size; bool enabled = buf->enabled_mask & (1 << ubo); bool empty = usz == 0; diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 1a90d947c99..12672c4e955 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -489,28 +489,9 @@ bi_emit_load_ubo(bi_builder *b, nir_intrinsic_instr *instr) bool offset_is_const = nir_src_is_const(*offset); bi_index dyn_offset = bi_src_index(offset); - uint32_t const_offset = 0; - + uint32_t const_offset = offset_is_const ? nir_src_as_uint(*offset) : 0; bool kernel_input = (instr->intrinsic == nir_intrinsic_load_kernel_input); - /* We may need to offset UBO loads by however many sysvals we have */ - unsigned sysval_offset = 16 * b->shader->sysvals.sysval_count; - - if (nir_src_is_const(*offset)) - const_offset = nir_src_as_uint(*offset); - - if ((kernel_input || - (nir_src_is_const(instr->src[0]) && - nir_src_as_uint(instr->src[0]) == 0)) && - b->shader->sysvals.sysval_count) { - if (offset_is_const) { - const_offset += sysval_offset; - } else { - dyn_offset = bi_iadd_u32(b, dyn_offset, - bi_imm_u32(sysval_offset), false); - } - } - bi_load_to(b, instr->num_components * 32, bi_dest_index(&instr->dest), offset_is_const ? bi_imm_u32(const_offset) : dyn_offset, @@ -635,7 +616,8 @@ bi_load_sysval(bi_builder *b, nir_instr *instr, unsigned idx = (uniform * 16) + offset; bi_load_to(b, nr_components * 32, bi_dest_index(&nir_dest), - bi_imm_u32(idx), bi_zero(), BI_SEG_UBO); + bi_imm_u32(idx), + bi_imm_u32(b->shader->nir->info.num_ubos), BI_SEG_UBO); } /* gl_FragCoord.xy = u16_to_f32(R59.xy) + 0.5 diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 39d28a9e649..fcfb145263f 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -1451,7 +1451,8 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr, /* Emit the read itself -- this is never indirect */ midgard_instruction *ins = - emit_ubo_read(ctx, instr, dest, (uniform * 16) + offset, NULL, 0, 0); + emit_ubo_read(ctx, instr, dest, (uniform * 16) + offset, NULL, 0, + ctx->nir->info.num_ubos); ins->mask = mask_of(nr_components); } @@ -1708,7 +1709,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) reg = nir_dest_index(&instr->dest); if (is_kernel) { - emit_ubo_read(ctx, &instr->instr, reg, (ctx->sysvals.sysval_count * 16) + offset, indirect_offset, 0, 0); + emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, 0); } else if (is_ubo) { nir_src index = instr->src[0]; @@ -1716,10 +1717,6 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) assert(nir_src_is_const(index)); uint32_t uindex = nir_src_as_uint(index); - - if (uindex == 0) - offset += ctx->sysvals.sysval_count * 16; - emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, uindex); } else if (is_global || is_shared || is_scratch) { unsigned seg = is_global ? LDST_GLOBAL : (is_shared ? LDST_SHARED : LDST_SCRATCH); diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index 973af220c2e..399aecb852e 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -1447,7 +1447,7 @@ schedule_block(compiler_context *ctx, midgard_block *block) void midgard_schedule_program(compiler_context *ctx) { - midgard_promote_uniforms(ctx); +// midgard_promote_uniforms(ctx); /* Must be lowered right before scheduling */ mir_squeeze_index(ctx); |