diff options
author | Alyssa Rosenzweig <alyssa@rosenzweig.io> | 2024-05-26 14:06:19 -0400 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2024-06-07 16:57:03 +0000 |
commit | 5f72234745ca1e57bb4b70c6c0d19b6ae8511705 (patch) | |
tree | f09557723f0c9964c21b0d8d37a6e40dd37a5265 /src/asahi | |
parent | d3291ad001dbb797dae2c9e50ed75d2686848965 (diff) |
asahi: split param structs for GS internal kernel
this simplifies state management consdierably
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29607>
Diffstat (limited to 'src/asahi')
-rw-r--r-- | src/asahi/lib/agx_nir_lower_gs.c | 14 | ||||
-rw-r--r-- | src/asahi/lib/shaders/geometry.cl | 64 | ||||
-rw-r--r-- | src/asahi/lib/shaders/geometry.h | 64 |
3 files changed, 80 insertions, 62 deletions
diff --git a/src/asahi/lib/agx_nir_lower_gs.c b/src/asahi/lib/agx_nir_lower_gs.c index 5d3848c4c75..1da1b31371e 100644 --- a/src/asahi/lib/agx_nir_lower_gs.c +++ b/src/asahi/lib/agx_nir_lower_gs.c @@ -379,10 +379,7 @@ lower_id(nir_builder *b, nir_intrinsic_instr *intr, void *data) id = load_geometry_param(b, flat_outputs); else if (intr->intrinsic == nir_intrinsic_load_input_topology_agx) id = load_geometry_param(b, input_topology); - else if (intr->intrinsic == nir_intrinsic_load_provoking_last) { - id = nir_b2b32( - b, libagx_is_provoking_last(b, nir_load_input_assembly_buffer_agx(b))); - } else + else return false; b->cursor = nir_instr_remove(&intr->instr); @@ -1452,10 +1449,7 @@ agx_nir_gs_setup_indirect(nir_builder *b, const void *data) { const struct agx_gs_setup_indirect_key *key = data; - libagx_gs_setup_indirect(b, nir_load_geometry_param_buffer_agx(b), - nir_load_input_assembly_buffer_agx(b), - nir_load_vs_output_buffer_ptr_agx(b), - nir_load_vs_outputs_agx(b), + libagx_gs_setup_indirect(b, nir_load_preamble(b, 1, 64, .base = 0), nir_imm_int(b, key->prim), nir_channel(b, nir_load_local_invocation_id(b), 0)); } @@ -1463,10 +1457,10 @@ agx_nir_gs_setup_indirect(nir_builder *b, const void *data) void agx_nir_unroll_restart(nir_builder *b, const void *data) { + const struct agx_unroll_restart_key *key = data; b->shader->info.workgroup_size[0] = 1024; - const struct agx_unroll_restart_key *key = data; - nir_def *ia = nir_load_input_assembly_buffer_agx(b); + nir_def *ia = nir_load_preamble(b, 1, 64, .base = 0); nir_def *draw = nir_channel(b, nir_load_workgroup_id(b), 0); nir_def *lane = nir_channel(b, nir_load_local_invocation_id(b), 0); nir_def *mode = nir_imm_int(b, key->prim); diff --git a/src/asahi/lib/shaders/geometry.cl b/src/asahi/lib/shaders/geometry.cl index e5bb5afaed9..a8ed851596d 100644 --- a/src/asahi/lib/shaders/geometry.cl +++ b/src/asahi/lib/shaders/geometry.cl @@ -249,8 +249,9 @@ first_true_thread_in_workgroup(bool cond, local uint *scratch) * sets up most of the new draw descriptor. */ static global void * -setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw, - uint draw, enum mesa_prim mode, uint index_size_B) +setup_unroll_for_draw(global struct agx_restart_unroll_params *p, + constant uint *in_draw, uint draw, enum mesa_prim mode, + uint index_size_B) { /* Determine an upper bound on the memory required for the index buffer. * Restarts only decrease the unrolled index buffer size, so the maximum size @@ -263,12 +264,12 @@ setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw, /* Allocate memory from the heap for the unrolled index buffer. Use an atomic * since multiple threads may be running to handle multidraw in parallel. */ - global struct agx_geometry_state *heap = ia->heap; + global struct agx_geometry_state *heap = p->heap; uint old_heap_bottom_B = atomic_fetch_add( (volatile atomic_uint *)(&heap->heap_bottom), align(alloc_size, 4)); /* Regardless of the input stride, we use tightly packed output draws */ - global uint *out = &ia->out_draws[5 * draw]; + global uint *out = &p->out_draws[5 * draw]; /* Setup most of the descriptor. Count will be determined after unroll. */ out[1] = in_draw[1]; /* instance count */ @@ -281,28 +282,28 @@ setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw, } #define UNROLL(INDEX, suffix) \ - kernel void libagx_unroll_restart_##suffix(global struct agx_ia_state *ia, \ - enum mesa_prim mode, uint draw, \ - uint tid) \ + kernel void libagx_unroll_restart_##suffix( \ + global struct agx_restart_unroll_params *p, enum mesa_prim mode, \ + uint draw, uint tid) \ { \ /* For an indirect multidraw, we are dispatched maxDraws times and \ * terminate trailing invocations. \ */ \ - if (ia->count && draw >= *(ia->count)) \ + if (p->count && draw >= *(p->count)) \ return; \ \ constant uint *in_draw = \ - (constant uint *)(ia->draws + (draw * ia->draw_stride)); \ + (constant uint *)(p->draws + (draw * p->draw_stride)); \ \ uint count = in_draw[0]; \ \ local uintptr_t out_ptr, in_ptr; \ if (tid == 0) { \ - out_ptr = (uintptr_t)setup_unroll_for_draw(ia, in_draw, draw, mode, \ + out_ptr = (uintptr_t)setup_unroll_for_draw(p, in_draw, draw, mode, \ sizeof(INDEX)); \ \ /* Accessed thru local mem because NIR deref is too aggressive */ \ - in_ptr = (uintptr_t)(ia->index_buffer + sizeof(INDEX) * in_draw[2]); \ + in_ptr = (uintptr_t)(p->index_buffer + sizeof(INDEX) * in_draw[2]); \ } \ \ barrier(CLK_LOCAL_MEM_FENCE); \ @@ -312,9 +313,9 @@ setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw, local uint scratch[32]; \ \ uint out_prims = 0; \ - INDEX restart_idx = ia->restart_index; \ - bool flatshade_first = ia->flatshade_first; \ - uint in_size_el = ia->index_buffer_size_B / sizeof(INDEX); \ + INDEX restart_idx = p->restart_index; \ + bool flatshade_first = p->flatshade_first; \ + uint in_size_el = p->index_buffer_size_B / sizeof(INDEX); \ \ uint needle = 0; \ uint per_prim = mesa_vertices_per_prim(mode); \ @@ -324,6 +325,7 @@ setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw, for (;;) { \ /* Relies on shortcircuiting */ \ uint idx = next_restart + tid; \ + /* XXX: robustness here */ \ bool restart = idx >= count || in[idx] == restart_idx; \ \ uint next_offs = first_true_thread_in_workgroup(restart, scratch); \ @@ -353,7 +355,7 @@ setup_unroll_for_draw(global struct agx_ia_state *ia, constant uint *in_draw, } \ \ if (tid == 0) \ - ia->out_draws[(5 * draw) + 0] = out_prims * per_prim; \ + p->out_draws[(5 * draw) + 0] = out_prims * per_prim; \ } UNROLL(uchar, u8) @@ -447,16 +449,15 @@ libagx_build_gs_draw(global struct agx_geometry_params *p, uint vertices, } void -libagx_gs_setup_indirect(global struct agx_geometry_params *p, - global struct agx_ia_state *ia, - global uintptr_t *vertex_buffer, uint64_t vs_outputs, +libagx_gs_setup_indirect(global struct agx_gs_setup_indirect_params *gsi, enum mesa_prim mode, uint local_id) { - global uint *in_draw = (global uint *)ia->draws; + global struct agx_geometry_params *p = gsi->geom; + global struct agx_ia_state *ia = gsi->ia; /* Determine the (primitives, instances) grid size. */ - uint vertex_count = in_draw[0]; - uint instance_count = in_draw[1]; + uint vertex_count = gsi->draw[0]; + uint instance_count = gsi->draw[1]; ia->verts_per_instance = vertex_count; @@ -478,22 +479,27 @@ libagx_gs_setup_indirect(global struct agx_geometry_params *p, * indirect draw, the hardware would do this for us, but for software input * assembly we need to do it ourselves. */ - if (ia->index_buffer) { - ia->index_buffer += ((constant uint *)ia->draws)[2] * ia->index_size_B; + if (gsi->index_buffer) { + ia->index_buffer = gsi->index_buffer + gsi->draw[2] * gsi->index_size_B; } - /* We may need to allocate VS and GS count buffers, do so now */ + /* We need to allocate VS and GS count buffers, do so now */ global struct agx_geometry_state *state = p->state; uint vertex_buffer_size = - libagx_tcs_in_size(vertex_count * instance_count, vs_outputs); + libagx_tcs_in_size(vertex_count * instance_count, gsi->vs_outputs); p->count_buffer = (global uint *)(state->heap + state->heap_bottom); state->heap_bottom += align(p->input_primitives * p->count_buffer_stride, 16); - *vertex_buffer = (uintptr_t)(state->heap + state->heap_bottom); + *(gsi->vertex_buffer) = (uintptr_t)(state->heap + state->heap_bottom); state->heap_bottom += align(vertex_buffer_size, 4); + + if (state->heap_bottom > 1024 * 1024 * 128) { + global uint *foo = (global uint *)(uintptr_t)0x1deadbeef; + *foo = 0x1234; + } } /* @@ -567,12 +573,6 @@ libagx_prefix_sum(global uint *buffer, uint len, uint words, uint word) } } -bool -libagx_is_provoking_last(global struct agx_ia_state *ia) -{ - return !ia->flatshade_first; -} - uintptr_t libagx_vertex_output_address(uintptr_t buffer, uint64_t mask, uint vtx, gl_varying_slot location) diff --git a/src/asahi/lib/shaders/geometry.h b/src/asahi/lib/shaders/geometry.h index 01867267e38..27ecff6b865 100644 --- a/src/asahi/lib/shaders/geometry.h +++ b/src/asahi/lib/shaders/geometry.h @@ -24,16 +24,13 @@ /* Packed geometry state buffer */ struct agx_geometry_state { - /* Heap to allocate from, in either direction. By convention, the top is used - * for intra-draw allocations and the bottom is used for full-batch - * allocations. In the future we could use kernel support to improve this. - */ + /* Heap to allocate from. */ GLOBAL(uchar) heap; - uint32_t heap_bottom, heap_top, heap_size, padding; + uint32_t heap_bottom, heap_size; } PACKED; -AGX_STATIC_ASSERT(sizeof(struct agx_geometry_state) == 6 * 4); +AGX_STATIC_ASSERT(sizeof(struct agx_geometry_state) == 4 * 4); -struct agx_ia_state { +struct agx_restart_unroll_params { /* Heap to allocate from across draws */ GLOBAL(struct agx_geometry_state) heap; @@ -46,36 +43,63 @@ struct agx_ia_state { /* Input: indirect draw descriptor. Raw pointer since it's strided. */ uint64_t draws; - /* When unrolling primitive restart, output draw descriptors */ + /* Output draw descriptors */ GLOBAL(uint) out_draws; - /* Number of vertices per instance. Written by CPU for direct draw, indirect - * setup kernel for indirect. This is used for VS->GS and VS->TCS indexing. - */ - uint32_t verts_per_instance; - /* Input: maximum draw count, count is clamped to this */ uint32_t max_draws; - /* Primitive restart index, if unrolling */ + /* Primitive restart index */ uint32_t restart_index; - /* Input index buffer size in bytes, if unrolling */ + /* Input index buffer size in bytes */ uint32_t index_buffer_size_B; - /* Stride for the draw descrptor array */ + /* Stride for the draw descriptor array */ uint32_t draw_stride; - /* When unrolling primitive restart, use first vertex as the provoking vertex - * for flat shading. We could stick this in the key, but meh, you're already - * hosed for perf on the unroll path. + /* Use first vertex as the provoking vertex for flat shading. We could stick + * this in the key, but meh, you're already hosed for perf on the unroll + * path. */ uint32_t flatshade_first; +} PACKED; +AGX_STATIC_ASSERT(sizeof(struct agx_restart_unroll_params) == 15 * 4); + +struct agx_gs_setup_indirect_params { + /* Index buffer if present. */ + CONST(uchar) index_buffer; + + /* Indirect draw descriptor. */ + CONST(uint) draw; + + /* Pointer to be written with allocated vertex buffer */ + GLOBAL(uintptr_t) vertex_buffer; + + /* Output input assembly state */ + GLOBAL(struct agx_ia_state) ia; + + /* Output geometry parameters */ + GLOBAL(struct agx_geometry_params) geom; + + /* Vertex (TES) output mask for sizing the allocated buffer */ + uint64_t vs_outputs; /* The index size (1, 2, 4) or 0 if drawing without an index buffer. */ uint32_t index_size_B; } PACKED; -AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 17 * 4); +AGX_STATIC_ASSERT(sizeof(struct agx_gs_setup_indirect_params) == 13 * 4); + +struct agx_ia_state { + /* Index buffer if present. */ + CONST(uchar) index_buffer; + + /* Number of vertices per instance. Written by CPU for direct draw, indirect + * setup kernel for indirect. This is used for VS->GS and VS->TCS indexing. + */ + uint32_t verts_per_instance; +} PACKED; +AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 3 * 4); struct agx_geometry_params { /* Persistent (cross-draw) geometry state */ |