summaryrefslogtreecommitdiff
path: root/src/asahi
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>2024-05-26 14:06:19 -0400
committerMarge Bot <emma+marge@anholt.net>2024-06-07 16:57:03 +0000
commit5f72234745ca1e57bb4b70c6c0d19b6ae8511705 (patch)
treef09557723f0c9964c21b0d8d37a6e40dd37a5265 /src/asahi
parentd3291ad001dbb797dae2c9e50ed75d2686848965 (diff)
asahi: split param structs for GS internal kernel
this simplifies state management consdierably Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29607>
Diffstat (limited to 'src/asahi')
-rw-r--r--src/asahi/lib/agx_nir_lower_gs.c14
-rw-r--r--src/asahi/lib/shaders/geometry.cl64
-rw-r--r--src/asahi/lib/shaders/geometry.h64
3 files changed, 80 insertions, 62 deletions
diff --git a/src/asahi/lib/agx_nir_lower_gs.c b/src/asahi/lib/agx_nir_lower_gs.c
index 5d3848c4c75..1da1b31371e 100644
--- a/src/asahi/lib/agx_nir_lower_gs.c
+++ b/src/asahi/lib/agx_nir_lower_gs.c
@@ -379,10 +379,7 @@ lower_id(nir_builder *b, nir_intrinsic_instr *intr, void *data)
id = load_geometry_param(b, flat_outputs);
else if (intr->intrinsic == nir_intrinsic_load_input_topology_agx)
id = load_geometry_param(b, input_topology);
- else if (intr->intrinsic == nir_intrinsic_load_provoking_last) {
- id = nir_b2b32(
- b, libagx_is_provoking_last(b, nir_load_input_assembly_buffer_agx(b)));
- } else
+ else
return false;
b->cursor = nir_instr_remove(&intr->instr);
@@ -1452,10 +1449,7 @@ agx_nir_gs_setup_indirect(nir_builder *b, const void *data)
{
const struct agx_gs_setup_indirect_key *key = data;
- libagx_gs_setup_indirect(b, nir_load_geometry_param_buffer_agx(b),
- nir_load_input_assembly_buffer_agx(b),
- nir_load_vs_output_buffer_ptr_agx(b),
- nir_load_vs_outputs_agx(b),
+ libagx_gs_setup_indirect(b, nir_load_preamble(b, 1, 64, .base = 0),
nir_imm_int(b, key->prim),
nir_channel(b, nir_load_local_invocation_id(b), 0));
}
@@ -1463,10 +1457,10 @@ agx_nir_gs_setup_indirect(nir_builder *b, const void *data)
void
agx_nir_unroll_restart(nir_builder *b, const void *data)
{
+ const struct agx_unroll_restart_key *key = data;
b->shader->info.workgroup_size[0] = 1024;
- const struct agx_unroll_restart_key *key = data;
- nir_def *ia = nir_load_input_assembly_buffer_agx(b);
+ nir_def *ia = nir_load_preamble(b, 1, 64, .base = 0);
nir_def *draw = nir_channel(b, nir_load_workgroup_id(b), 0);
nir_def *lane = nir_channel(b, nir_load_local_invocation_id(b), 0);
nir_def *mode = nir_imm_int(b, key->prim);
diff --git a/src/asahi/lib/shaders/geometry.cl b/src/asahi/lib/shaders/geometry.cl
index e5bb5afaed9..a8ed851596d 100644
--- a/src/asahi/lib/shaders/geometry.cl
+++ b/src/asahi/lib/shaders/geometry.cl
@@ -249,8 +249,9 @@ first_true_thread_in_workgroup(bool cond, local uint *scratch)
* sets up most of the new draw descriptor.
*/
static global void *
-setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw,
- uint draw, enum mesa_prim mode, uint index_size_B)
+setup_unroll_for_draw(global struct agx_restart_unroll_params *p,
+ constant uint *in_draw, uint draw, enum mesa_prim mode,
+ uint index_size_B)
{
/* Determine an upper bound on the memory required for the index buffer.
* Restarts only decrease the unrolled index buffer size, so the maximum size
@@ -263,12 +264,12 @@ setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw,
/* Allocate memory from the heap for the unrolled index buffer. Use an atomic
* since multiple threads may be running to handle multidraw in parallel.
*/
- global struct agx_geometry_state *heap = ia->heap;
+ global struct agx_geometry_state *heap = p->heap;
uint old_heap_bottom_B = atomic_fetch_add(
(volatile atomic_uint *)(&heap->heap_bottom), align(alloc_size, 4));
/* Regardless of the input stride, we use tightly packed output draws */
- global uint *out = &ia->out_draws[5 * draw];
+ global uint *out = &p->out_draws[5 * draw];
/* Setup most of the descriptor. Count will be determined after unroll. */
out[1] = in_draw[1]; /* instance count */
@@ -281,28 +282,28 @@ setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw,
}
#define UNROLL(INDEX, suffix) \
- kernel void libagx_unroll_restart_##suffix(global struct agx_ia_state *ia, \
- enum mesa_prim mode, uint draw, \
- uint tid) \
+ kernel void libagx_unroll_restart_##suffix( \
+ global struct agx_restart_unroll_params *p, enum mesa_prim mode, \
+ uint draw, uint tid) \
{ \
/* For an indirect multidraw, we are dispatched maxDraws times and \
* terminate trailing invocations. \
*/ \
- if (ia->count && draw >= *(ia->count)) \
+ if (p->count && draw >= *(p->count)) \
return; \
\
constant uint *in_draw = \
- (constant uint *)(ia->draws + (draw * ia->draw_stride)); \
+ (constant uint *)(p->draws + (draw * p->draw_stride)); \
\
uint count = in_draw[0]; \
\
local uintptr_t out_ptr, in_ptr; \
if (tid == 0) { \
- out_ptr = (uintptr_t)setup_unroll_for_draw(ia, in_draw, draw, mode, \
+ out_ptr = (uintptr_t)setup_unroll_for_draw(p, in_draw, draw, mode, \
sizeof(INDEX)); \
\
/* Accessed thru local mem because NIR deref is too aggressive */ \
- in_ptr = (uintptr_t)(ia->index_buffer + sizeof(INDEX) * in_draw[2]); \
+ in_ptr = (uintptr_t)(p->index_buffer + sizeof(INDEX) * in_draw[2]); \
} \
\
barrier(CLK_LOCAL_MEM_FENCE); \
@@ -312,9 +313,9 @@ setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw,
local uint scratch[32]; \
\
uint out_prims = 0; \
- INDEX restart_idx = ia->restart_index; \
- bool flatshade_first = ia->flatshade_first; \
- uint in_size_el = ia->index_buffer_size_B / sizeof(INDEX); \
+ INDEX restart_idx = p->restart_index; \
+ bool flatshade_first = p->flatshade_first; \
+ uint in_size_el = p->index_buffer_size_B / sizeof(INDEX); \
\
uint needle = 0; \
uint per_prim = mesa_vertices_per_prim(mode); \
@@ -324,6 +325,7 @@ setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw,
for (;;) { \
/* Relies on shortcircuiting */ \
uint idx = next_restart + tid; \
+ /* XXX: robustness here */ \
bool restart = idx >= count || in[idx] == restart_idx; \
\
uint next_offs = first_true_thread_in_workgroup(restart, scratch); \
@@ -353,7 +355,7 @@ setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw,
} \
\
if (tid == 0) \
- ia->out_draws[(5 * draw) + 0] = out_prims * per_prim; \
+ p->out_draws[(5 * draw) + 0] = out_prims * per_prim; \
}
UNROLL(uchar, u8)
@@ -447,16 +449,15 @@ libagx_build_gs_draw(global struct agx_geometry_params *p, uint vertices,
}
void
-libagx_gs_setup_indirect(global struct agx_geometry_params *p,
- global struct agx_ia_state *ia,
- global uintptr_t *vertex_buffer, uint64_t vs_outputs,
+libagx_gs_setup_indirect(global struct agx_gs_setup_indirect_params *gsi,
enum mesa_prim mode, uint local_id)
{
- global uint *in_draw = (global uint *)ia->draws;
+ global struct agx_geometry_params *p = gsi->geom;
+ global struct agx_ia_state *ia = gsi->ia;
/* Determine the (primitives, instances) grid size. */
- uint vertex_count = in_draw[0];
- uint instance_count = in_draw[1];
+ uint vertex_count = gsi->draw[0];
+ uint instance_count = gsi->draw[1];
ia->verts_per_instance = vertex_count;
@@ -478,22 +479,27 @@ libagx_gs_setup_indirect(global struct agx_geometry_params *p,
* indirect draw, the hardware would do this for us, but for software input
* assembly we need to do it ourselves.
*/
- if (ia->index_buffer) {
- ia->index_buffer += ((constant uint *)ia->draws)[2] * ia->index_size_B;
+ if (gsi->index_buffer) {
+ ia->index_buffer = gsi->index_buffer + gsi->draw[2] * gsi->index_size_B;
}
- /* We may need to allocate VS and GS count buffers, do so now */
+ /* We need to allocate VS and GS count buffers, do so now */
global struct agx_geometry_state *state = p->state;
uint vertex_buffer_size =
- libagx_tcs_in_size(vertex_count * instance_count, vs_outputs);
+ libagx_tcs_in_size(vertex_count * instance_count, gsi->vs_outputs);
p->count_buffer = (global uint *)(state->heap + state->heap_bottom);
state->heap_bottom +=
align(p->input_primitives * p->count_buffer_stride, 16);
- *vertex_buffer = (uintptr_t)(state->heap + state->heap_bottom);
+ *(gsi->vertex_buffer) = (uintptr_t)(state->heap + state->heap_bottom);
state->heap_bottom += align(vertex_buffer_size, 4);
+
+ if (state->heap_bottom > 1024 * 1024 * 128) {
+ global uint *foo = (global uint *)(uintptr_t)0x1deadbeef;
+ *foo = 0x1234;
+ }
}
/*
@@ -567,12 +573,6 @@ libagx_prefix_sum(global uint *buffer, uint len, uint words, uint word)
}
}
-bool
-libagx_is_provoking_last(global struct agx_ia_state *ia)
-{
- return !ia->flatshade_first;
-}
-
uintptr_t
libagx_vertex_output_address(uintptr_t buffer, uint64_t mask, uint vtx,
gl_varying_slot location)
diff --git a/src/asahi/lib/shaders/geometry.h b/src/asahi/lib/shaders/geometry.h
index 01867267e38..27ecff6b865 100644
--- a/src/asahi/lib/shaders/geometry.h
+++ b/src/asahi/lib/shaders/geometry.h
@@ -24,16 +24,13 @@
/* Packed geometry state buffer */
struct agx_geometry_state {
- /* Heap to allocate from, in either direction. By convention, the top is used
- * for intra-draw allocations and the bottom is used for full-batch
- * allocations. In the future we could use kernel support to improve this.
- */
+ /* Heap to allocate from. */
GLOBAL(uchar) heap;
- uint32_t heap_bottom, heap_top, heap_size, padding;
+ uint32_t heap_bottom, heap_size;
} PACKED;
-AGX_STATIC_ASSERT(sizeof(struct agx_geometry_state) == 6 * 4);
+AGX_STATIC_ASSERT(sizeof(struct agx_geometry_state) == 4 * 4);
-struct agx_ia_state {
+struct agx_restart_unroll_params {
/* Heap to allocate from across draws */
GLOBAL(struct agx_geometry_state) heap;
@@ -46,36 +43,63 @@ struct agx_ia_state {
/* Input: indirect draw descriptor. Raw pointer since it's strided. */
uint64_t draws;
- /* When unrolling primitive restart, output draw descriptors */
+ /* Output draw descriptors */
GLOBAL(uint) out_draws;
- /* Number of vertices per instance. Written by CPU for direct draw, indirect
- * setup kernel for indirect. This is used for VS->GS and VS->TCS indexing.
- */
- uint32_t verts_per_instance;
-
/* Input: maximum draw count, count is clamped to this */
uint32_t max_draws;
- /* Primitive restart index, if unrolling */
+ /* Primitive restart index */
uint32_t restart_index;
- /* Input index buffer size in bytes, if unrolling */
+ /* Input index buffer size in bytes */
uint32_t index_buffer_size_B;
- /* Stride for the draw descrptor array */
+ /* Stride for the draw descriptor array */
uint32_t draw_stride;
- /* When unrolling primitive restart, use first vertex as the provoking vertex
- * for flat shading. We could stick this in the key, but meh, you're already
- * hosed for perf on the unroll path.
+ /* Use first vertex as the provoking vertex for flat shading. We could stick
+ * this in the key, but meh, you're already hosed for perf on the unroll
+ * path.
*/
uint32_t flatshade_first;
+} PACKED;
+AGX_STATIC_ASSERT(sizeof(struct agx_restart_unroll_params) == 15 * 4);
+
+struct agx_gs_setup_indirect_params {
+ /* Index buffer if present. */
+ CONST(uchar) index_buffer;
+
+ /* Indirect draw descriptor. */
+ CONST(uint) draw;
+
+ /* Pointer to be written with allocated vertex buffer */
+ GLOBAL(uintptr_t) vertex_buffer;
+
+ /* Output input assembly state */
+ GLOBAL(struct agx_ia_state) ia;
+
+ /* Output geometry parameters */
+ GLOBAL(struct agx_geometry_params) geom;
+
+ /* Vertex (TES) output mask for sizing the allocated buffer */
+ uint64_t vs_outputs;
/* The index size (1, 2, 4) or 0 if drawing without an index buffer. */
uint32_t index_size_B;
} PACKED;
-AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 17 * 4);
+AGX_STATIC_ASSERT(sizeof(struct agx_gs_setup_indirect_params) == 13 * 4);
+
+struct agx_ia_state {
+ /* Index buffer if present. */
+ CONST(uchar) index_buffer;
+
+ /* Number of vertices per instance. Written by CPU for direct draw, indirect
+ * setup kernel for indirect. This is used for VS->GS and VS->TCS indexing.
+ */
+ uint32_t verts_per_instance;
+} PACKED;
+AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 3 * 4);
struct agx_geometry_params {
/* Persistent (cross-draw) geometry state */