diff options
author | Samuel Pitoiset <samuel.pitoiset@gmail.com> | 2017-03-30 17:22:20 +0200 |
---|---|---|
committer | Samuel Pitoiset <samuel.pitoiset@gmail.com> | 2017-04-26 16:05:01 +0200 |
commit | 97eb26735ebfec341b9f4838511f3b3379712522 (patch) | |
tree | d470eb8f0c3ce9e5d6458ed665356539cb8c422c | |
parent | e4ec7f7c95e249a00708669df93ecc9fbd18c5b3 (diff) |
radeonsi: implement ARB_bindless_texture (WIP)
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
-rw-r--r-- | src/gallium/drivers/radeonsi/si_blit.c | 116 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 595 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_hw_context.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 22 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 45 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 1 |
9 files changed, 790 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 998288dba2..c5af5d1652 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -611,9 +611,56 @@ static void si_check_render_feedback_images(struct si_context *sctx, } } -static void si_check_render_feedback(struct si_context *sctx) +static void si_check_render_feedback_resident_textures(struct si_context *sctx) +{ + unsigned i; + + for (i = 0; i < sctx->num_resident_tex_handles; i++) { + struct si_texture_handle *tex_handle = + sctx->resident_tex_handles[i]; + struct pipe_sampler_view *view; + struct r600_texture *tex; + + view = &tex_handle->view->base; + if (view->texture->target == PIPE_BUFFER) + continue; + + tex = (struct r600_texture *)view->texture; + + si_check_render_feedback_texture(sctx, tex, + view->u.tex.first_level, + view->u.tex.last_level, + view->u.tex.first_layer, + view->u.tex.last_layer); + } +} + +static void si_check_render_feedback_resident_images(struct si_context *sctx) { + unsigned i; + + for (i = 0; i < sctx->num_resident_img_handles; i++) { + struct si_image_handle *img_handle = + sctx->resident_img_handles[i]; + struct pipe_image_view *view; + struct r600_texture *tex; + + view = &img_handle->view; + if (view->resource->target == PIPE_BUFFER) + continue; + + tex = (struct r600_texture *)view->resource; + si_check_render_feedback_texture(sctx, tex, + view->u.tex.level, + view->u.tex.level, + view->u.tex.first_layer, + view->u.tex.last_layer); + } +} + +static void si_check_render_feedback(struct si_context *sctx) +{ if (!sctx->need_check_render_feedback) return; @@ -621,6 +668,10 @@ static void si_check_render_feedback(struct si_context *sctx) si_check_render_feedback_images(sctx, &sctx->images[i]); si_check_render_feedback_textures(sctx, &sctx->samplers[i]); } + + si_check_render_feedback_resident_images(sctx); + si_check_render_feedback_resident_textures(sctx); + sctx->need_check_render_feedback = false; } @@ -667,6 +718,69 @@ void si_decompress_compute_textures(struct si_context *sctx) si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE); } +static bool is_compressed_colortex(struct r600_texture *rtex) +{ + return rtex->cmask.size || rtex->fmask.size || + (rtex->dcc_offset && rtex->dirty_level_mask); +} + +static bool is_depth_texture(struct r600_texture *rtex, + struct si_sampler_view *sview) +{ + return rtex->db_compatible && + (!rtex->tc_compatible_htile || sview->is_stencil_sampler); +} + +void si_decompress_resident_textures(struct si_context *sctx) +{ + unsigned i; + + if (sctx->blitter->running) + return; + + for (i = 0; i < sctx->num_resident_tex_handles; i++) { + struct si_texture_handle *tex_handle = + sctx->resident_tex_handles[i]; + struct si_sampler_view *sview = tex_handle->view; + struct pipe_sampler_view *view = &sview->base; + struct r600_texture *tex; + + assert(view); + tex = (struct r600_texture *)view->texture; + + if (view->texture->target == PIPE_BUFFER) + continue; + + if (is_compressed_colortex(tex)) + si_decompress_color_texture(sctx, tex, view->u.tex.first_level, + view->u.tex.last_level); + + if (is_depth_texture(tex, sview)) + si_flush_depth_texture(sctx, tex, + sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z, + view->u.tex.first_level, view->u.tex.last_level, + 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level)); + } + + for (i = 0; i < sctx->num_resident_img_handles; i++) { + struct si_image_handle *img_handle = + sctx->resident_img_handles[i]; + struct pipe_image_view *view = &img_handle->view; + struct r600_texture *tex; + + assert(view); + tex = (struct r600_texture *)view->resource; + + if (view->resource->target == PIPE_BUFFER) + continue; + + if (is_compressed_colortex(tex)) { + si_decompress_color_texture(sctx, tex, view->u.tex.level, + view->u.tex.level); + } + } +} + static void si_clear(struct pipe_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 33ebe2e7d9..0f7ae7ab0a 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -763,6 +763,7 @@ static void si_launch_grid( } si_decompress_compute_textures(sctx); + si_decompress_resident_textures(sctx); /* Add buffer sizes for memory checking in need_cs_space. */ r600_context_add_resource_size(ctx, &program->shader.bo->b.b); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 5ba8052c23..ee7ad7a61d 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -60,6 +60,7 @@ #include "sid.h" #include "gfx9d.h" +#include "util/hash_table.h" #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" @@ -1976,6 +1977,594 @@ void si_emit_compute_shader_userdata(struct si_context *sctx) sctx->shader_pointers_dirty &= ~compute_mask; } +/* BINDLESS */ + +static int si_add_resident_descriptor(struct si_context *sctx, + struct r600_resource *desc) +{ + int idx; + + /* New resident descriptor, check if the backing array is large enough. */ + if (sctx->num_resident_descriptors >= sctx->max_resident_descriptors) { + unsigned new_max_descriptors = + MAX2(1, sctx->max_resident_descriptors * 2); + struct r600_resource **new_descriptors = + REALLOC(sctx->resident_descriptors, + sctx->num_resident_descriptors * (sizeof(*new_descriptors)), + new_max_descriptors * sizeof(*new_descriptors)); + + if (new_descriptors) { + sctx->resident_descriptors = new_descriptors; + sctx->max_resident_descriptors = new_max_descriptors; + } else { + fprintf(stderr, "si_add_resident_descriptor: " + "allocation failed\n"); + return -1; + } + } + + idx = sctx->num_resident_descriptors; + sctx->resident_descriptors[idx] = desc; + sctx->num_resident_descriptors++; + + return 0; +} + +static void si_del_resident_descriptor(struct si_context *sctx, + struct r600_resource *desc) +{ + unsigned i; + int size; + + for (i = 0; i < sctx->num_resident_descriptors; i++) { + if (sctx->resident_descriptors[i] != desc) + continue; + + if (i < sctx->num_resident_descriptors - 1) { + size = sizeof(*sctx->resident_descriptors) * + (sctx->num_resident_descriptors - 1 - i); + + memmove(&sctx->resident_descriptors[i], + &sctx->resident_descriptors[i + 1], size); + } + + sctx->num_resident_descriptors--; + return; + } +} + +static int si_add_resident_tex_handle(struct si_context *sctx, + struct si_texture_handle *tex_handle) +{ + int idx; + + /* New resident handle, check if the backing array is large enough. */ + if (sctx->num_resident_tex_handles >= sctx->max_resident_tex_handles) { + unsigned new_max_handles = + MAX2(1, sctx->max_resident_tex_handles * 2); + struct si_texture_handle **new_handles = + REALLOC(sctx->resident_tex_handles, + sctx->num_resident_tex_handles * (sizeof(*new_handles)), + new_max_handles * sizeof(*new_handles)); + + if (new_handles) { + sctx->resident_tex_handles = new_handles; + sctx->max_resident_tex_handles = new_max_handles; + } else { + fprintf(stderr, "si_add_resident_tex_handle: " + "allocation failed\n"); + return -1; + } + } + + idx = sctx->num_resident_tex_handles; + sctx->resident_tex_handles[idx] = tex_handle; + sctx->num_resident_tex_handles++; + + return 0; +} + +static void si_del_resident_tex_handle(struct si_context *sctx, + struct si_texture_handle *tex_handle) +{ + unsigned i; + int size; + + for (i = 0; i < sctx->num_resident_tex_handles; i++) { + if (sctx->resident_tex_handles[i] != tex_handle) + continue; + + if (i < sctx->num_resident_tex_handles - 1) { + size = sizeof(*sctx->resident_tex_handles) * + (sctx->num_resident_tex_handles - 1 - i); + + memmove(&sctx->resident_tex_handles[i], + &sctx->resident_tex_handles[i + 1], size); + } + + sctx->num_resident_tex_handles--; + return; + } +} + +static int si_add_resident_img_handle(struct si_context *sctx, + struct si_image_handle *img_handle) +{ + int idx; + + /* New resident handle, check if the backing array is large enough. */ + if (sctx->num_resident_img_handles >= sctx->max_resident_img_handles) { + unsigned new_max_handles = + MAX2(1, sctx->max_resident_img_handles * 2); + struct si_image_handle **new_handles = + REALLOC(sctx->resident_img_handles, + sctx->num_resident_img_handles * (sizeof(*new_handles)), + new_max_handles * sizeof(*new_handles)); + + if (new_handles) { + sctx->resident_img_handles = new_handles; + sctx->max_resident_img_handles = new_max_handles; + } else { + fprintf(stderr, "si_add_resident_img_handle: " + "allocation failed\n"); + return -1; + } + } + + idx = sctx->num_resident_img_handles; + sctx->resident_img_handles[idx] = img_handle; + sctx->num_resident_img_handles++; + + return 0; +} + +static void si_del_resident_img_handle(struct si_context *sctx, + struct si_image_handle *img_handle) +{ + unsigned i; + int size; + + for (i = 0; i < sctx->num_resident_img_handles; i++) { + if (sctx->resident_img_handles[i] != img_handle) + continue; + + if (i < sctx->num_resident_img_handles - 1) { + size = sizeof(*sctx->resident_img_handles) * + (sctx->num_resident_img_handles - 1 - i); + + memmove(&sctx->resident_img_handles[i], + &sctx->resident_img_handles[i + 1], size); + } + + sctx->num_resident_img_handles--; + return; + } +} + +static struct si_resident_descriptor * +si_create_and_upload_resident_descriptor(struct si_context *sctx, + uint32_t *desc_list, unsigned size) +{ + struct si_screen *sscreen = sctx->screen; + struct si_resident_descriptor *desc; + struct pb_slab_entry *entry; + void *ptr; + + /* Sub-allocate resident descriptors from slabs. */ + entry = pb_slab_alloc(&sctx->desc_bo_slabs, 64, 0); + if (!entry) + return NULL; + + desc = NULL; + desc = container_of(entry, desc, entry); + + /* Upload the descriptor. */ + ptr = sscreen->b.ws->buffer_map(desc->buf->buf, NULL, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED); + util_memcpy_cpu_to_le32(ptr + desc->offset, desc_list, size); + sscreen->b.ws->buffer_unmap(desc->buf->buf); + + return desc; +} + +static uint64_t si_create_texture_handle(struct pipe_context *ctx, + struct pipe_resource *texture, + struct pipe_sampler_view *view, + const const struct pipe_sampler_state *state) +{ + struct si_sampler_view *sview = (struct si_sampler_view *)view; + struct si_context *sctx = (struct si_context *)ctx; + struct si_screen *sscreen = sctx->screen; + struct si_texture_handle *tex_handle; + struct r600_texture *rtex; + uint32_t desc_list[16]; + uint64_t handle; + + memset(desc_list, 0, sizeof(desc_list)); + si_init_descriptor_list(&desc_list[0], 16, 1, null_texture_descriptor); + + memcpy(&desc_list[0], sview->state, 8*4); + + rtex = (struct r600_texture *)sview->base.texture; + + if (rtex->resource.b.b.target == PIPE_BUFFER) { + si_set_buf_desc_address(&rtex->resource, + sview->base.u.buf.offset, + &desc_list[4]); + } else { + bool is_separate_stencil = rtex->db_compatible && + sview->is_stencil_sampler; + + si_set_mutable_tex_desc_fields(sscreen, rtex, + sview->base_level_info, + sview->base_level, + sview->base.u.tex.first_level, + sview->block_width, + is_separate_stencil, + &desc_list[0]); + } + + if (rtex->resource.b.b.target != PIPE_BUFFER && rtex->fmask.size) { + memcpy(&desc_list[8], sview->fmask_state, 8*4); + } else { + struct si_sampler_state *sstate = + ctx->create_sampler_state(ctx, state); + if (!sstate) + return 0; + + /* Disable FMASK and bind sampler state in [12:15]. */ + memcpy(&desc_list[8], null_texture_descriptor, 4*4); + memcpy(&desc_list[12], sstate->val, 4*4); + + ctx->delete_sampler_state(ctx, sstate); + } + + tex_handle = CALLOC_STRUCT(si_texture_handle); + if (!tex_handle) + return 0; + + tex_handle->view = sview; + tex_handle->desc = + si_create_and_upload_resident_descriptor(sctx, desc_list, + sizeof(desc_list)); + if (!tex_handle->desc) { + FREE(tex_handle); + return 0; + } + + /* Add the texture handle to the per-context list. */ + handle = tex_handle->desc->buf->gpu_address + tex_handle->desc->offset; + if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)handle, + tex_handle)) { + pb_slab_free(&sctx->desc_bo_slabs, &tex_handle->desc->entry); + FREE(tex_handle); + return 0; + } + + return handle; +} + +static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct si_texture_handle *tex_handle; + struct hash_entry *entry; + + entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle); + if (!entry) + return; + tex_handle = (struct si_texture_handle *)entry->data; + + _mesa_hash_table_remove(sctx->tex_handles, entry); + pb_slab_free(&sctx->desc_bo_slabs, &tex_handle->desc->entry); + FREE(tex_handle); +} + +static void si_make_texture_handle_resident(struct pipe_context *ctx, + uint64_t handle, bool resident) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct si_texture_handle *tex_handle; + struct si_sampler_view *sview; + struct r600_resource *rres; + struct hash_entry *entry; + + entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle); + if (!entry) + return; + + tex_handle = (struct si_texture_handle *)entry->data; + sview = tex_handle->view; + + rres = (struct r600_resource *)sview->base.texture; + + if (resident) { + struct r600_texture *rtex = + (struct r600_texture *)sview->base.texture; + + /* Prevent resident buffers to be reallocated. */ + rres->flags |= RADEON_FLAG_RESIDENT; + + /* Make the texture handle resident. */ + si_add_resident_tex_handle(sctx, tex_handle); + + /* Add the buffers to the current CS in case si_begin_new_cs() + * is not going to be called. */ + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + tex_handle->desc->buf, + RADEON_USAGE_READ, + RADEON_PRIO_DESCRIPTORS); + + si_sampler_view_add_buffer(sctx, sview->base.texture, + RADEON_USAGE_READ, + sview->is_stencil_sampler, false); + + if (rtex->dcc_offset && + p_atomic_read(&rtex->framebuffers_bound)) + sctx->need_check_render_feedback = true; + } else { + /* Allow this non-resident buffer to be reallocated. In case + * the buffer is referenced in the current CS, mapping it would + * cause waiting for the GPU. */ + rres->flags &= ~RADEON_FLAG_RESIDENT; + + /* Make the texture handle non-resident. */ + si_del_resident_tex_handle(sctx, tex_handle); + } +} + +static uint64_t si_create_image_handle(struct pipe_context *ctx, + const struct pipe_image_view *view) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct si_screen *sscreen = sctx->screen; + struct si_image_handle *img_handle; + struct r600_resource *res; + uint32_t desc_list[16]; + uint64_t handle; + + memset(desc_list, 0, sizeof(desc_list)); + si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor); + + res = (struct r600_resource *)view->resource; + + if (res->b.b.target == PIPE_BUFFER) { + if (view->access & PIPE_IMAGE_ACCESS_WRITE) + si_mark_image_range_valid(view); + + si_make_buffer_descriptor(sscreen, res, view->format, + view->u.buf.offset, view->u.buf.size, + &desc_list[0]); + si_set_buf_desc_address(res, view->u.buf.offset, + &desc_list[4]); + } else { + static const unsigned char swizzle[4] = { 0, 1, 2, 3 }; + struct r600_texture *tex = (struct r600_texture *)res; + unsigned level = view->u.tex.level; + unsigned width, height, depth; + + assert(!tex->is_depth); + assert(tex->fmask.size == 0); + + /* Always force the base level to the selected level. + * + * This is required for 3D textures, where otherwise + * selecting a single slice for non-layered bindings + * fails. It doesn't hurt the other targets. + */ + width = u_minify(res->b.b.width0, level); + height = u_minify(res->b.b.height0, level); + depth = u_minify(res->b.b.depth0, level); + + si_make_texture_descriptor(sscreen, tex, + false, res->b.b.target, + view->format, swizzle, + 0, 0, + view->u.tex.first_layer, + view->u.tex.last_layer, + width, height, depth, + &desc_list[0], NULL); + si_set_mutable_tex_desc_fields(sscreen, tex, + &tex->surface.u.legacy.level[level], + level, level, + util_format_get_blockwidth(view->format), + false, &desc_list[0]); + } + + img_handle = CALLOC_STRUCT(si_image_handle); + if (!img_handle) + return 0; + + util_copy_image_view(&img_handle->view, view); + img_handle->desc = + si_create_and_upload_resident_descriptor(sctx, desc_list, + sizeof(desc_list)); + if (!img_handle->desc) { + FREE(img_handle); + return 0; + } + + /* Add the image handle to the per-context list. */ + handle = img_handle->desc->buf->gpu_address + img_handle->desc->offset; + if (!_mesa_hash_table_insert(sctx->img_handles, (void *)handle, + img_handle)) { + pb_slab_free(&sctx->desc_bo_slabs, &img_handle->desc->entry); + FREE(img_handle); + return 0; + } + + return handle; +} + +static void si_delete_image_handle(struct pipe_context *ctx, uint64_t handle) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct si_image_handle *img_handle; + struct hash_entry *entry; + + entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle); + if (!entry) + return; + img_handle = (struct si_image_handle *)entry->data; + + _mesa_hash_table_remove(sctx->img_handles, entry); + pb_slab_free(&sctx->desc_bo_slabs, &img_handle->desc->entry); + FREE(img_handle); +} + +static void si_make_image_handle_resident(struct pipe_context *ctx, + uint64_t handle, unsigned access, + bool resident) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct si_image_handle *img_handle; + struct pipe_image_view *view; + struct hash_entry *entry; + + entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle); + if (!entry) + return; + img_handle = (struct si_image_handle *)entry->data; + view = &img_handle->view; + + if (resident) { + struct r600_texture *rtex = + (struct r600_texture *)view->resource; + + /* Make the image handle resident. */ + si_add_resident_img_handle(sctx, img_handle); + + /* Add the buffers to the current CS in case si_begin_new_cs() + * is not going to be called. */ + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + img_handle->desc->buf, + RADEON_USAGE_READ, + RADEON_PRIO_DESCRIPTORS); + + si_sampler_view_add_buffer(sctx, view->resource, + RADEON_USAGE_READWRITE, + false, false); + if (rtex->dcc_offset && + p_atomic_read(&rtex->framebuffers_bound)) + sctx->need_check_render_feedback = true; + } else { + si_del_resident_img_handle(sctx, img_handle); + } +} + +void si_all_resident_buffers_begin_new_cs(struct si_context *sctx) +{ + unsigned i; + + /* Add all resident descriptors. */ + for (i = 0; i < sctx->num_resident_descriptors; i++) { + struct r600_resource *desc = sctx->resident_descriptors[i]; + + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc, + RADEON_USAGE_READ, + RADEON_PRIO_DESCRIPTORS); + } + + /* Add all resident texture handles. */ + for (i = 0; i < sctx->num_resident_tex_handles; i++) { + struct si_texture_handle *tex_handle = + sctx->resident_tex_handles[i]; + struct si_sampler_view *sview = tex_handle->view; + + si_sampler_view_add_buffer(sctx, sview->base.texture, + RADEON_USAGE_READ, + sview->is_stencil_sampler, false); + } + + /* Add all resident image handles. */ + for (i = 0; i < sctx->num_resident_img_handles; i++) { + struct si_image_handle *img_handle = + sctx->resident_img_handles[i]; + struct pipe_image_view *view = &img_handle->view; + + si_sampler_view_add_buffer(sctx, view->resource, + RADEON_USAGE_READWRITE, + false, false); + } +} + +bool si_desc_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry) +{ + return true; +} + +struct si_slab +{ + struct pb_slab base; + struct r600_resource *buf; + struct si_resident_descriptor *entries; +}; + +struct pb_slab *si_desc_bo_slab_alloc(void *priv, unsigned heap, + unsigned entry_size, unsigned group_index) +{ + struct si_context *sctx = priv; + struct si_screen *sscreen = sctx->screen; + struct si_slab *slab; + + slab = CALLOC_STRUCT(si_slab); + if (!slab) + return NULL; + + /* Create a buffer in VRAM for 4096 resident handles. */ + slab->buf = (struct r600_resource *)pipe_buffer_create(&sscreen->b.b, 0, + PIPE_USAGE_IMMUTABLE, 64 * 1024); + if (!slab->buf) + goto fail; + + slab->base.num_entries = slab->buf->bo_size / entry_size; + slab->base.num_free = slab->base.num_entries; + slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); + if (!slab->entries) + goto fail_buffer; + + LIST_INITHEAD(&slab->base.free); + + for (unsigned i = 0; i < slab->base.num_entries; ++i) { + struct si_resident_descriptor *desc = &slab->entries[i]; + + desc->entry.slab = &slab->base; + desc->entry.group_index = group_index; + desc->buf = slab->buf; + desc->offset = i * entry_size; + + LIST_ADDTAIL(&desc->entry.head, &slab->base.free); + } + + /* Add the descriptor to the per-context residency list. */ + if (si_add_resident_descriptor(sctx, slab->buf)) + goto fail_desc; + + return &slab->base; + +fail_desc: + FREE(slab->entries); +fail_buffer: + r600_resource_reference(&slab->buf, NULL); +fail: + FREE(slab); + return NULL; +} + +void si_desc_bo_slab_free(void *priv, struct pb_slab *pslab) +{ + struct si_context *sctx = priv; + struct si_slab *slab = (struct si_slab *)pslab; + + /* Remove the descriptor from the per-context residency list. */ + si_del_resident_descriptor(sctx, slab->buf); + + r600_resource_reference(&slab->buf, NULL); + FREE(slab->entries); + FREE(slab); +} + /* INIT/DEINIT/UPLOAD */ void si_init_all_descriptors(struct si_context *sctx) @@ -2039,6 +2628,12 @@ void si_init_all_descriptors(struct si_context *sctx) sctx->b.b.set_shader_buffers = si_set_shader_buffers; sctx->b.b.set_sampler_views = si_set_sampler_views; sctx->b.b.set_stream_output_targets = si_set_streamout_targets; + sctx->b.b.create_texture_handle = si_create_texture_handle; + sctx->b.b.delete_texture_handle = si_delete_texture_handle; + sctx->b.b.make_texture_handle_resident = si_make_texture_handle_resident; + sctx->b.b.create_image_handle = si_create_image_handle; + sctx->b.b.delete_image_handle = si_delete_image_handle; + sctx->b.b.make_image_handle_resident = si_make_image_handle_resident; sctx->b.invalidate_buffer = si_invalidate_buffer; /* Shader user data. */ diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index e15f6a9cc6..d58de08e6d 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -231,6 +231,7 @@ void si_begin_new_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom); si_all_descriptors_begin_new_cs(ctx); + si_all_resident_buffers_begin_new_cs(ctx); ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index a1cdded1fd..7d19d3638c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -27,6 +27,7 @@ #include "sid.h" #include "radeon/radeon_uvd.h" +#include "util/hash_table.h" #include "util/u_memory.h" #include "util/u_suballoc.h" #include "util/u_tests.h" @@ -95,6 +96,14 @@ static void si_destroy_context(struct pipe_context *context) r600_resource_reference(&sctx->last_trace_buf, NULL); radeon_clear_saved_cs(&sctx->last_gfx); + _mesa_hash_table_destroy(sctx->tex_handles, NULL); + _mesa_hash_table_destroy(sctx->img_handles, NULL); + + pb_slabs_deinit(&sctx->desc_bo_slabs); + + FREE(sctx->resident_descriptors); + FREE(sctx->resident_tex_handles); + FREE(sctx->resident_img_handles); FREE(sctx); } @@ -313,6 +322,19 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->tm = si_create_llvm_target_machine(sscreen); + /* Bindless handles. */ + sctx->tex_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + sctx->img_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + /* Create a slab allocator for all resident descriptors. */ + if (!pb_slabs_init(&sctx->desc_bo_slabs, 6, 6, 12, sctx, + si_desc_bo_can_reclaim_slab, + si_desc_bo_slab_alloc, + si_desc_bo_slab_free)) + goto fail; + return &sctx->b.b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 09788316a4..b974ec56a0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -225,6 +225,29 @@ union si_vgt_param_key { uint32_t index; }; +struct si_resident_buffer { + struct r600_resource *buf; + enum radeon_bo_usage usage; + enum radeon_bo_priority priority; +}; + +struct si_resident_descriptor +{ + struct pb_slab_entry entry; + struct r600_resource *buf; + unsigned offset; +}; + +struct si_image_handle { + struct si_resident_descriptor *desc; + struct pipe_image_view view; +}; + +struct si_texture_handle { + struct si_resident_descriptor *desc; + struct si_sampler_view *view; +}; + struct si_context { struct r600_common_context b; struct blitter_context *blitter; @@ -383,6 +406,27 @@ struct si_context { /* Precomputed IA_MULTI_VGT_PARAM */ union si_vgt_param_key ia_multi_vgt_param_key; unsigned ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES]; + + /* Bindless handles */ + struct hash_table *tex_handles; + struct hash_table *img_handles; + + struct pb_slabs desc_bo_slabs; + + /* Resident texture handles */ + struct si_texture_handle **resident_tex_handles; + unsigned num_resident_tex_handles; + unsigned max_resident_tex_handles; + + /* Resident image handles */ + struct si_image_handle **resident_img_handles; + unsigned num_resident_img_handles; + unsigned max_resident_img_handles; + + /* Resident descriptors. */ + struct r600_resource **resident_descriptors; + unsigned num_resident_descriptors; + unsigned max_resident_descriptors; }; /* cik_sdma.c */ @@ -392,6 +436,7 @@ void cik_init_sdma_functions(struct si_context *sctx); void si_init_blit_functions(struct si_context *sctx); void si_decompress_graphics_textures(struct si_context *sctx); void si_decompress_compute_textures(struct si_context *sctx); +void si_decompress_resident_textures(struct si_context *sctx); void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dst_level, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5d7175d674..5bb453bc26 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4963,7 +4963,8 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, const unsigned src_idx = 2; unsigned sampler = inst->Src[src_idx].Register.Index; - assert(inst->Src[src_idx].Register.File == TGSI_FILE_SAMPLER); + assert(inst->Src[src_idx].Register.File == TGSI_FILE_SAMPLER || + inst->Src[src_idx].Register.File == TGSI_FILE_CONSTANT); if (info->sampler_type[sampler] == TGSI_RETURN_TYPE_SINT || info->sampler_type[sampler] == TGSI_RETURN_TYPE_UINT) diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 6257299fc3..91301417cb 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -30,6 +30,8 @@ #include "si_pm4.h" #include "radeon/r600_pipe_common.h" +#include "pipebuffer/pb_slab.h" + #define SI_NUM_GRAPHICS_SHADERS (PIPE_SHADER_TESS_EVAL+1) #define SI_NUM_SHADERS (PIPE_SHADER_COMPUTE+1) @@ -308,6 +310,7 @@ bool si_upload_graphics_shader_descriptors(struct si_context *sctx); bool si_upload_compute_shader_descriptors(struct si_context *sctx); void si_release_all_descriptors(struct si_context *sctx); void si_all_descriptors_begin_new_cs(struct si_context *sctx); +void si_all_resident_buffers_begin_new_cs(struct si_context *sctx); void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, const uint8_t *ptr, unsigned size, uint32_t *const_offset); void si_update_all_texture_descriptors(struct si_context *sctx); @@ -318,6 +321,11 @@ void si_emit_graphics_shader_userdata(struct si_context *sctx, void si_emit_compute_shader_userdata(struct si_context *sctx); void si_set_rw_buffer(struct si_context *sctx, uint slot, const struct pipe_constant_buffer *input); +bool si_desc_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry); +struct pb_slab *si_desc_bo_slab_alloc(void *priv, unsigned heap, + unsigned entry_size, unsigned group_index); +void si_desc_bo_slab_free(void *priv, struct pb_slab *pslab); + /* si_state.c */ struct si_shader_selector; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 9b7b52c359..037c4d4b85 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1149,6 +1149,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } si_decompress_graphics_textures(sctx); + si_decompress_resident_textures(sctx); /* Set the rasterization primitive type. * |