summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>2021-01-29 18:06:02 -0500
committerMarge Bot <eric+marge@anholt.net>2021-02-11 17:24:37 +0000
commitdb7e2dce1c83f826f01a6d9508b84afe34097347 (patch)
treef892fdf5237a453a7dd7984d382ed81680d74fe3
parent0dc539a87227e88bbee790ecc6ec8ec6eb991333 (diff)
panfrost: Move sysvals to dedicated UBO
This makes UBO 0 less special, allowing us to generalize uniform optimization. Note this disables RMU on Midgard as we're about to rewrite the RMU mechanism. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8973>
-rw-r--r--src/gallium/drivers/panfrost/pan_assemble.c8
-rw-r--r--src/gallium/drivers/panfrost/pan_cmdstream.c36
-rw-r--r--src/panfrost/bifrost/bifrost_compile.c24
-rw-r--r--src/panfrost/midgard/midgard_compile.c9
-rw-r--r--src/panfrost/midgard/midgard_schedule.c2
5 files changed, 19 insertions, 60 deletions
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c
index 9508785bb1b..31d262cb039 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -399,12 +399,8 @@ panfrost_shader_compile(struct panfrost_context *ctx,
state->attribute_count = attribute_count;
state->varying_count = varying_count;
- /* Uniforms have been lowered to UBOs using nir_lower_uniforms_to_ubo()
- * which already increments s->info.num_ubos. We do have to account for
- * the "no uniform, no UBO" case though, otherwise sysval passed
- * through uniforms won't work correctly.
- */
- state->ubo_count = MAX2(s->info.num_ubos, 1);
+ /* Sysvals have dedicated UBO */
+ state->ubo_count = s->info.num_ubos + (state->sysval_count ? 1 : 0);
/* Prepare the descriptors at compile-time */
state->shader.shader = shader;
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 55a52f6eded..40bfec8568d 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -989,55 +989,39 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
return 0;
struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
-
struct panfrost_shader_state *ss = &all->variants[all->active_variant];
- /* Uniforms are implicitly UBO #0 */
- bool has_uniforms = buf->enabled_mask & (1 << 0);
-
/* Allocate room for the sysval and the uniforms */
size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
- size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
- size_t size = sys_size + uniform_size;
struct panfrost_ptr transfer =
- panfrost_pool_alloc_aligned(&batch->pool, size, 16);
+ panfrost_pool_alloc_aligned(&batch->pool, sys_size, 16);
/* Upload sysvals requested by the shader */
panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
- /* Upload uniforms */
- if (has_uniforms && uniform_size) {
- const void *cpu = panfrost_map_constant_buffer_cpu(ctx, buf, 0);
- memcpy(transfer.cpu + sys_size, cpu, uniform_size);
- }
-
- /* Next up, attach UBOs. UBO #0 is the uniforms we just
- * uploaded, so it's always included. The count is the highest UBO
- * addressable -- gaps are included. */
-
- unsigned ubo_count = 32 - __builtin_clz(buf->enabled_mask | 1);
+ /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
+ struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage);
+ unsigned ubo_count = shader->ubo_count - (sys_size ? 1 : 0);
- size_t sz = MALI_UNIFORM_BUFFER_LENGTH * ubo_count;
+ size_t sz = MALI_UNIFORM_BUFFER_LENGTH * (ubo_count + 1);
struct panfrost_ptr ubos =
panfrost_pool_alloc_aligned(&batch->pool, sz,
MALI_UNIFORM_BUFFER_LENGTH);
uint64_t *ubo_ptr = (uint64_t *) ubos.cpu;
- /* Upload uniforms as a UBO */
+ /* Upload sysval as a final UBO */
- if (size) {
- pan_pack(ubo_ptr, UNIFORM_BUFFER, cfg) {
- cfg.entries = DIV_ROUND_UP(size, 16);
+ if (sys_size) {
+ pan_pack(ubo_ptr + ubo_count, UNIFORM_BUFFER, cfg) {
+ cfg.entries = DIV_ROUND_UP(sys_size, 16);
cfg.pointer = transfer.gpu;
}
- } else {
- *ubo_ptr = 0;
}
/* The rest are honest-to-goodness UBOs */
- for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
+ for (unsigned ubo = 0; ubo < ubo_count; ++ubo) {
size_t usz = buf->cb[ubo].buffer_size;
bool enabled = buf->enabled_mask & (1 << ubo);
bool empty = usz == 0;
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 1a90d947c99..12672c4e955 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -489,28 +489,9 @@ bi_emit_load_ubo(bi_builder *b, nir_intrinsic_instr *instr)
bool offset_is_const = nir_src_is_const(*offset);
bi_index dyn_offset = bi_src_index(offset);
- uint32_t const_offset = 0;
-
+ uint32_t const_offset = offset_is_const ? nir_src_as_uint(*offset) : 0;
bool kernel_input = (instr->intrinsic == nir_intrinsic_load_kernel_input);
- /* We may need to offset UBO loads by however many sysvals we have */
- unsigned sysval_offset = 16 * b->shader->sysvals.sysval_count;
-
- if (nir_src_is_const(*offset))
- const_offset = nir_src_as_uint(*offset);
-
- if ((kernel_input ||
- (nir_src_is_const(instr->src[0]) &&
- nir_src_as_uint(instr->src[0]) == 0)) &&
- b->shader->sysvals.sysval_count) {
- if (offset_is_const) {
- const_offset += sysval_offset;
- } else {
- dyn_offset = bi_iadd_u32(b, dyn_offset,
- bi_imm_u32(sysval_offset), false);
- }
- }
-
bi_load_to(b, instr->num_components * 32,
bi_dest_index(&instr->dest), offset_is_const ?
bi_imm_u32(const_offset) : dyn_offset,
@@ -635,7 +616,8 @@ bi_load_sysval(bi_builder *b, nir_instr *instr,
unsigned idx = (uniform * 16) + offset;
bi_load_to(b, nr_components * 32, bi_dest_index(&nir_dest),
- bi_imm_u32(idx), bi_zero(), BI_SEG_UBO);
+ bi_imm_u32(idx),
+ bi_imm_u32(b->shader->nir->info.num_ubos), BI_SEG_UBO);
}
/* gl_FragCoord.xy = u16_to_f32(R59.xy) + 0.5
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 39d28a9e649..fcfb145263f 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1451,7 +1451,8 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr,
/* Emit the read itself -- this is never indirect */
midgard_instruction *ins =
- emit_ubo_read(ctx, instr, dest, (uniform * 16) + offset, NULL, 0, 0);
+ emit_ubo_read(ctx, instr, dest, (uniform * 16) + offset, NULL, 0,
+ ctx->nir->info.num_ubos);
ins->mask = mask_of(nr_components);
}
@@ -1708,7 +1709,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
reg = nir_dest_index(&instr->dest);
if (is_kernel) {
- emit_ubo_read(ctx, &instr->instr, reg, (ctx->sysvals.sysval_count * 16) + offset, indirect_offset, 0, 0);
+ emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, 0);
} else if (is_ubo) {
nir_src index = instr->src[0];
@@ -1716,10 +1717,6 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
assert(nir_src_is_const(index));
uint32_t uindex = nir_src_as_uint(index);
-
- if (uindex == 0)
- offset += ctx->sysvals.sysval_count * 16;
-
emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, uindex);
} else if (is_global || is_shared || is_scratch) {
unsigned seg = is_global ? LDST_GLOBAL : (is_shared ? LDST_SHARED : LDST_SCRATCH);
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c
index 973af220c2e..399aecb852e 100644
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -1447,7 +1447,7 @@ schedule_block(compiler_context *ctx, midgard_block *block)
void
midgard_schedule_program(compiler_context *ctx)
{
- midgard_promote_uniforms(ctx);
+// midgard_promote_uniforms(ctx);
/* Must be lowered right before scheduling */
mir_squeeze_index(ctx);