diff options
-rw-r--r-- | src/vulkan/runtime/vk_command_buffer.h | 8 | ||||
-rw-r--r-- | src/vulkan/runtime/vk_limits.h | 3 | ||||
-rw-r--r-- | src/vulkan/runtime/vk_pipeline.c | 1730 | ||||
-rw-r--r-- | src/vulkan/runtime/vk_pipeline.h | 4 | ||||
-rw-r--r-- | src/vulkan/runtime/vk_shader.c | 11 | ||||
-rw-r--r-- | src/vulkan/runtime/vk_shader.h | 76 |
6 files changed, 1830 insertions, 2 deletions
diff --git a/src/vulkan/runtime/vk_command_buffer.h b/src/vulkan/runtime/vk_command_buffer.h index e49b3077d34..6ae637ca5ab 100644 --- a/src/vulkan/runtime/vk_command_buffer.h +++ b/src/vulkan/runtime/vk_command_buffer.h @@ -185,6 +185,14 @@ struct vk_command_buffer { struct vk_attachment_state _attachments[8]; VkRenderPassSampleLocationsBeginInfoEXT *pass_sample_locations; + + /** + * Bitmask of shader stages bound via a vk_pipeline since the last call to + * vkBindShadersEXT(). + * + * Used by the common vk_pipeline implementation + */ + VkShaderStageFlags pipeline_shader_stages; }; VK_DEFINE_HANDLE_CASTS(vk_command_buffer, base, VkCommandBuffer, diff --git a/src/vulkan/runtime/vk_limits.h b/src/vulkan/runtime/vk_limits.h index e4756794b08..50bfde0c0eb 100644 --- a/src/vulkan/runtime/vk_limits.h +++ b/src/vulkan/runtime/vk_limits.h @@ -24,6 +24,9 @@ #ifndef VK_LIMITS_H #define VK_LIMITS_H +/* Maximun number of shader stages in a single graphics pipeline */ +#define MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES 5 + #define MESA_VK_MAX_DESCRIPTOR_SETS 32 /* From the Vulkan 1.3.274 spec: diff --git a/src/vulkan/runtime/vk_pipeline.c b/src/vulkan/runtime/vk_pipeline.c index ac7b66a13f4..c914120b87b 100644 --- a/src/vulkan/runtime/vk_pipeline.c +++ b/src/vulkan/runtime/vk_pipeline.c @@ -23,18 +23,23 @@ #include "vk_pipeline.h" +#include "vk_alloc.h" #include "vk_common_entrypoints.h" #include "vk_command_buffer.h" +#include "vk_descriptor_set_layout.h" #include "vk_device.h" +#include "vk_graphics_state.h" #include "vk_log.h" #include "vk_nir.h" +#include "vk_physical_device.h" +#include "vk_pipeline_layout.h" +#include "vk_shader.h" #include "vk_shader_module.h" #include "vk_util.h" #include "nir_serialize.h" #include "util/mesa-sha1.h" -#include "util/mesa-blake3.h" bool vk_pipeline_shader_stage_is_null(const VkPipelineShaderStageCreateInfo *info) @@ -419,3 +424,1726 @@ vk_common_CmdBindPipeline(VkCommandBuffer commandBuffer, pipeline->ops->cmd_bind(cmd_buffer, pipeline); } + +static const struct vk_pipeline_cache_object_ops pipeline_shader_cache_ops; + +static struct vk_shader * +vk_shader_from_cache_obj(struct vk_pipeline_cache_object *object) +{ + assert(object->ops == &pipeline_shader_cache_ops); + return container_of(object, struct vk_shader, pipeline.cache_obj); +} + +static bool +vk_pipeline_shader_serialize(struct vk_pipeline_cache_object *object, + struct blob *blob) +{ + struct vk_shader *shader = vk_shader_from_cache_obj(object); + struct vk_device *device = shader->base.device; + + return shader->ops->serialize(device, shader, blob); +} + +static void +vk_shader_init_cache_obj(struct vk_device *device, struct vk_shader *shader, + const void *key_data, size_t key_size) +{ + assert(key_size == sizeof(shader->pipeline.cache_key)); + memcpy(&shader->pipeline.cache_key, key_data, + sizeof(shader->pipeline.cache_key)); + + vk_pipeline_cache_object_init(device, &shader->pipeline.cache_obj, + &pipeline_shader_cache_ops, + &shader->pipeline.cache_key, + sizeof(shader->pipeline.cache_key)); +} + +static struct vk_pipeline_cache_object * +vk_pipeline_shader_deserialize(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size, + struct blob_reader *blob) +{ + struct vk_device *device = cache->base.device; + const struct vk_device_shader_ops *ops = device->shader_ops; + + /* TODO: Do we really want to always use the latest version? */ + const uint32_t version = device->physical->properties.shaderBinaryVersion; + + struct vk_shader *shader; + VkResult result = ops->deserialize(device, blob, version, + &device->alloc, &shader); + if (result != VK_SUCCESS) { + assert(result == VK_ERROR_OUT_OF_HOST_MEMORY); + return NULL; + } + + vk_shader_init_cache_obj(device, shader, key_data, key_size); + + return &shader->pipeline.cache_obj; +} + +static void +vk_pipeline_shader_destroy(struct vk_device *device, + struct vk_pipeline_cache_object *object) +{ + struct vk_shader *shader = vk_shader_from_cache_obj(object); + assert(shader->base.device == device); + + vk_shader_destroy(device, shader, &device->alloc); +} + +static const struct vk_pipeline_cache_object_ops pipeline_shader_cache_ops = { + .serialize = vk_pipeline_shader_serialize, + .deserialize = vk_pipeline_shader_deserialize, + .destroy = vk_pipeline_shader_destroy, +}; + +static struct vk_shader * +vk_shader_ref(struct vk_shader *shader) +{ + vk_pipeline_cache_object_ref(&shader->pipeline.cache_obj); + return shader; +} + +static void +vk_shader_unref(struct vk_device *device, struct vk_shader *shader) +{ + vk_pipeline_cache_object_unref(device, &shader->pipeline.cache_obj); +} + +struct vk_pipeline_tess_info { + unsigned tcs_vertices_out : 8; + unsigned primitive_mode : 2; /* tess_primitive_mode */ + unsigned spacing : 2; /* gl_tess_spacing */ + unsigned ccw : 1; + unsigned point_mode : 1; + unsigned _pad : 18; +}; +static_assert(sizeof(struct vk_pipeline_tess_info) == 4, + "This struct has no holes"); + +static void +vk_pipeline_gather_nir_tess_info(const nir_shader *nir, + struct vk_pipeline_tess_info *info) +{ + info->tcs_vertices_out = nir->info.tess.tcs_vertices_out; + info->primitive_mode = nir->info.tess._primitive_mode; + info->spacing = nir->info.tess.spacing; + info->ccw = nir->info.tess.ccw; + info->point_mode = nir->info.tess.point_mode; +} + +static void +vk_pipeline_replace_nir_tess_info(nir_shader *nir, + const struct vk_pipeline_tess_info *info) +{ + nir->info.tess.tcs_vertices_out = info->tcs_vertices_out; + nir->info.tess._primitive_mode = info->primitive_mode; + nir->info.tess.spacing = info->spacing; + nir->info.tess.ccw = info->ccw; + nir->info.tess.point_mode = info->point_mode; +} + +static void +vk_pipeline_tess_info_merge(struct vk_pipeline_tess_info *dst, + const struct vk_pipeline_tess_info *src) +{ + /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says: + * + * "PointMode. Controls generation of points rather than triangles + * or lines. This functionality defaults to disabled, and is + * enabled if either shader stage includes the execution mode. + * + * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw, + * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd, + * and OutputVertices, it says: + * + * "One mode must be set in at least one of the tessellation + * shader stages." + * + * So, the fields can be set in either the TCS or TES, but they must + * agree if set in both. + */ + assert(dst->tcs_vertices_out == 0 || + src->tcs_vertices_out == 0 || + dst->tcs_vertices_out == src->tcs_vertices_out); + dst->tcs_vertices_out |= src->tcs_vertices_out; + + static_assert(TESS_SPACING_UNSPECIFIED == 0, ""); + assert(dst->spacing == TESS_SPACING_UNSPECIFIED || + src->spacing == TESS_SPACING_UNSPECIFIED || + dst->spacing == src->spacing); + dst->spacing |= src->spacing; + + static_assert(TESS_PRIMITIVE_UNSPECIFIED == 0, ""); + assert(dst->primitive_mode == TESS_PRIMITIVE_UNSPECIFIED || + src->primitive_mode == TESS_PRIMITIVE_UNSPECIFIED || + dst->primitive_mode == src->primitive_mode); + dst->primitive_mode |= src->primitive_mode; + dst->ccw |= src->ccw; + dst->point_mode |= src->point_mode; +} + +struct vk_pipeline_precomp_shader { + struct vk_pipeline_cache_object cache_obj; + + /* Key for this cache_obj in the pipeline cache. + * + * This is always the output of vk_pipeline_hash_shader_stage() so it must + * be a SHA1 hash. + */ + uint8_t cache_key[SHA1_DIGEST_LENGTH]; + + gl_shader_stage stage; + + struct vk_pipeline_robustness_state rs; + + /* Tessellation info if the shader is a tessellation shader */ + struct vk_pipeline_tess_info tess; + + /* Hash of the vk_pipeline_precomp_shader + * + * This is the hash of the final compiled NIR together with tess info and + * robustness state. It's used as a key for final binary lookups. By + * having this as a separate key, we can de-duplicate cases where you have + * different SPIR-V or specialization constants but end up compiling the + * same NIR shader in the end anyway. + */ + blake3_hash blake3; + + struct blob nir_blob; +}; + +static struct vk_pipeline_precomp_shader * +vk_pipeline_precomp_shader_ref(struct vk_pipeline_precomp_shader *shader) +{ + vk_pipeline_cache_object_ref(&shader->cache_obj); + return shader; +} + +static void +vk_pipeline_precomp_shader_unref(struct vk_device *device, + struct vk_pipeline_precomp_shader *shader) +{ + vk_pipeline_cache_object_unref(device, &shader->cache_obj); +} + +static const struct vk_pipeline_cache_object_ops pipeline_precomp_shader_cache_ops; + +static struct vk_pipeline_precomp_shader * +vk_pipeline_precomp_shader_from_cache_obj(struct vk_pipeline_cache_object *obj) +{ + assert(obj->ops == & pipeline_precomp_shader_cache_ops); + return container_of(obj, struct vk_pipeline_precomp_shader, cache_obj); +} + +static struct vk_pipeline_precomp_shader * +vk_pipeline_precomp_shader_create(struct vk_device *device, + const void *key_data, size_t key_size, + const struct vk_pipeline_robustness_state *rs, + nir_shader *nir) +{ + struct blob blob; + blob_init(&blob); + + nir_serialize(&blob, nir, false); + + if (blob.out_of_memory) + goto fail_blob; + + struct vk_pipeline_precomp_shader *shader = + vk_zalloc(&device->alloc, sizeof(*shader), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (shader == NULL) + goto fail_blob; + + assert(sizeof(shader->cache_key) == key_size); + memcpy(shader->cache_key, key_data, sizeof(shader->cache_key)); + + vk_pipeline_cache_object_init(device, &shader->cache_obj, + &pipeline_precomp_shader_cache_ops, + shader->cache_key, + sizeof(shader->cache_key)); + + shader->stage = nir->info.stage; + shader->rs = *rs; + + vk_pipeline_gather_nir_tess_info(nir, &shader->tess); + + struct mesa_blake3 blake3_ctx; + _mesa_blake3_init(&blake3_ctx); + _mesa_blake3_update(&blake3_ctx, rs, sizeof(*rs)); + _mesa_blake3_update(&blake3_ctx, blob.data, blob.size); + _mesa_blake3_final(&blake3_ctx, shader->blake3); + + shader->nir_blob = blob; + + return shader; + +fail_blob: + blob_finish(&blob); + + return NULL; +} + +static bool +vk_pipeline_precomp_shader_serialize(struct vk_pipeline_cache_object *obj, + struct blob *blob) +{ + struct vk_pipeline_precomp_shader *shader = + vk_pipeline_precomp_shader_from_cache_obj(obj); + + blob_write_uint32(blob, shader->stage); + blob_write_bytes(blob, &shader->rs, sizeof(shader->rs)); + blob_write_bytes(blob, &shader->tess, sizeof(shader->tess)); + blob_write_bytes(blob, shader->blake3, sizeof(shader->blake3)); + blob_write_uint64(blob, shader->nir_blob.size); + blob_write_bytes(blob, shader->nir_blob.data, shader->nir_blob.size); + + return !blob->out_of_memory; +} + +static struct vk_pipeline_cache_object * +vk_pipeline_precomp_shader_deserialize(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size, + struct blob_reader *blob) +{ + struct vk_device *device = cache->base.device; + + struct vk_pipeline_precomp_shader *shader = + vk_zalloc(&device->alloc, sizeof(*shader), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (shader == NULL) + return NULL; + + assert(sizeof(shader->cache_key) == key_size); + memcpy(shader->cache_key, key_data, sizeof(shader->cache_key)); + + vk_pipeline_cache_object_init(device, &shader->cache_obj, + &pipeline_precomp_shader_cache_ops, + shader->cache_key, + sizeof(shader->cache_key)); + + shader->stage = blob_read_uint32(blob); + blob_copy_bytes(blob, &shader->rs, sizeof(shader->rs)); + blob_copy_bytes(blob, &shader->tess, sizeof(shader->tess)); + blob_copy_bytes(blob, shader->blake3, sizeof(shader->blake3)); + + uint64_t nir_size = blob_read_uint64(blob); + if (blob->overrun || nir_size > SIZE_MAX) + goto fail_shader; + + const void *nir_data = blob_read_bytes(blob, nir_size); + if (blob->overrun) + goto fail_shader; + + blob_init(&shader->nir_blob); + blob_write_bytes(&shader->nir_blob, nir_data, nir_size); + if (shader->nir_blob.out_of_memory) + goto fail_nir_blob; + + return &shader->cache_obj; + +fail_nir_blob: + blob_finish(&shader->nir_blob); +fail_shader: + vk_pipeline_cache_object_finish(&shader->cache_obj); + vk_free(&device->alloc, shader); + + return NULL; +} + +static void +vk_pipeline_precomp_shader_destroy(struct vk_device *device, + struct vk_pipeline_cache_object *obj) +{ + struct vk_pipeline_precomp_shader *shader = + vk_pipeline_precomp_shader_from_cache_obj(obj); + + blob_finish(&shader->nir_blob); + vk_pipeline_cache_object_finish(&shader->cache_obj); + vk_free(&device->alloc, shader); +} + +static nir_shader * +vk_pipeline_precomp_shader_get_nir(const struct vk_pipeline_precomp_shader *shader, + const struct nir_shader_compiler_options *nir_options) +{ + struct blob_reader blob; + blob_reader_init(&blob, shader->nir_blob.data, shader->nir_blob.size); + + nir_shader *nir = nir_deserialize(NULL, nir_options, &blob); + if (blob.overrun) { + ralloc_free(nir); + return NULL; + } + + return nir; +} + +static const struct vk_pipeline_cache_object_ops pipeline_precomp_shader_cache_ops = { + .serialize = vk_pipeline_precomp_shader_serialize, + .deserialize = vk_pipeline_precomp_shader_deserialize, + .destroy = vk_pipeline_precomp_shader_destroy, +}; + +static VkResult +vk_pipeline_precompile_shader(struct vk_device *device, + struct vk_pipeline_cache *cache, + VkPipelineCreateFlags2KHR pipeline_flags, + const void *pipeline_info_pNext, + const VkPipelineShaderStageCreateInfo *info, + struct vk_pipeline_precomp_shader **ps_out) +{ + const struct vk_device_shader_ops *ops = device->shader_ops; + VkResult result; + + struct vk_pipeline_robustness_state rs; + vk_pipeline_robustness_state_fill(device, &rs, + pipeline_info_pNext, + info->pNext); + + uint8_t stage_sha1[SHA1_DIGEST_LENGTH]; + vk_pipeline_hash_shader_stage(info, &rs, stage_sha1); + + if (cache != NULL) { + struct vk_pipeline_cache_object *cache_obj = + vk_pipeline_cache_lookup_object(cache, stage_sha1, sizeof(stage_sha1), + &pipeline_precomp_shader_cache_ops, + NULL /* cache_hit */); + if (cache_obj != NULL) { + *ps_out = vk_pipeline_precomp_shader_from_cache_obj(cache_obj); + return VK_SUCCESS; + } + } + + if (pipeline_flags & + VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) + return VK_PIPELINE_COMPILE_REQUIRED; + + const gl_shader_stage stage = vk_to_mesa_shader_stage(info->stage); + const struct nir_shader_compiler_options *nir_options = + ops->get_nir_options(device->physical, stage, &rs); + const struct spirv_to_nir_options spirv_options = + ops->get_spirv_options(device->physical, stage, &rs); + + nir_shader *nir; + result = vk_pipeline_shader_stage_to_nir(device, info, &spirv_options, + nir_options, NULL, &nir); + if (result != VK_SUCCESS) + return result; + + if (ops->preprocess_nir != NULL) + ops->preprocess_nir(device->physical, nir); + + struct vk_pipeline_precomp_shader *shader = + vk_pipeline_precomp_shader_create(device, stage_sha1, + sizeof(stage_sha1), + &rs, nir); + ralloc_free(nir); + if (shader == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (cache != NULL) { + struct vk_pipeline_cache_object *cache_obj = &shader->cache_obj; + cache_obj = vk_pipeline_cache_add_object(cache, cache_obj); + shader = vk_pipeline_precomp_shader_from_cache_obj(cache_obj); + } + + *ps_out = shader; + + return VK_SUCCESS; +} + +struct vk_pipeline_stage { + gl_shader_stage stage; + + struct vk_pipeline_precomp_shader *precomp; + struct vk_shader *shader; +}; + +static int +cmp_vk_pipeline_stages(const void *_a, const void *_b) +{ + const struct vk_pipeline_stage *a = _a, *b = _b; + return vk_shader_cmp_graphics_stages(a->stage, b->stage); +} + +static bool +vk_pipeline_stage_is_null(const struct vk_pipeline_stage *stage) +{ + return stage->precomp == NULL && stage->shader == NULL; +} + +static void +vk_pipeline_stage_finish(struct vk_device *device, + struct vk_pipeline_stage *stage) +{ + if (stage->precomp != NULL) + vk_pipeline_precomp_shader_unref(device, stage->precomp); + + if (stage->shader) + vk_shader_unref(device, stage->shader); +} + +static struct vk_pipeline_stage +vk_pipeline_stage_clone(const struct vk_pipeline_stage *in) +{ + struct vk_pipeline_stage out = { + .stage = in->stage, + }; + + if (in->precomp) + out.precomp = vk_pipeline_precomp_shader_ref(in->precomp); + + if (in->shader) + out.shader = vk_shader_ref(in->shader); + + return out; +} + +struct vk_graphics_pipeline { + struct vk_pipeline base; + + union { + struct { + struct vk_graphics_pipeline_all_state all_state; + struct vk_graphics_pipeline_state state; + } lib; + + struct { + struct vk_vertex_input_state _dynamic_vi; + struct vk_sample_locations_state _dynamic_sl; + struct vk_dynamic_graphics_state dynamic; + } linked; + }; + + uint32_t set_layout_count; + struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS]; + + uint32_t stage_count; + struct vk_pipeline_stage stages[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES]; +}; + +static void +vk_graphics_pipeline_destroy(struct vk_device *device, + struct vk_pipeline *pipeline, + const VkAllocationCallbacks *pAllocator) +{ + struct vk_graphics_pipeline *gfx_pipeline = + container_of(pipeline, struct vk_graphics_pipeline, base); + + for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) + vk_pipeline_stage_finish(device, &gfx_pipeline->stages[i]); + + for (uint32_t i = 0; i < gfx_pipeline->set_layout_count; i++) { + if (gfx_pipeline->set_layouts[i] != NULL) + vk_descriptor_set_layout_unref(device, gfx_pipeline->set_layouts[i]); + } + + vk_pipeline_free(device, pAllocator, pipeline); +} + +static bool +vk_device_supports_stage(struct vk_device *device, + gl_shader_stage stage) +{ + const struct vk_features *features = &device->physical->supported_features; + + switch (stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + return true; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + return features->tessellationShader; + case MESA_SHADER_GEOMETRY: + return features->geometryShader; + case MESA_SHADER_TASK: + return features->taskShader; + case MESA_SHADER_MESH: + return features->meshShader; + default: + return false; + } +} + +static const gl_shader_stage all_gfx_stages[] = { + MESA_SHADER_VERTEX, + MESA_SHADER_TESS_CTRL, + MESA_SHADER_TESS_EVAL, + MESA_SHADER_GEOMETRY, + MESA_SHADER_TASK, + MESA_SHADER_MESH, + MESA_SHADER_FRAGMENT, +}; + +static void +vk_graphics_pipeline_cmd_bind(struct vk_command_buffer *cmd_buffer, + struct vk_pipeline *pipeline) +{ + struct vk_device *device = cmd_buffer->base.device; + const struct vk_device_shader_ops *ops = device->shader_ops; + + struct vk_graphics_pipeline *gfx_pipeline = NULL; + struct vk_shader *stage_shader[PIPE_SHADER_MESH_TYPES] = { NULL, }; + if (pipeline != NULL) { + assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); + assert(!(pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)); + gfx_pipeline = container_of(pipeline, struct vk_graphics_pipeline, base); + + for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) { + struct vk_shader *shader = gfx_pipeline->stages[i].shader; + stage_shader[shader->stage] = shader; + } + } + + uint32_t stage_count = 0; + gl_shader_stage stages[ARRAY_SIZE(all_gfx_stages)]; + struct vk_shader *shaders[ARRAY_SIZE(all_gfx_stages)]; + + VkShaderStageFlags vk_stages = 0; + for (uint32_t i = 0; i < ARRAY_SIZE(all_gfx_stages); i++) { + gl_shader_stage stage = all_gfx_stages[i]; + if (!vk_device_supports_stage(device, stage)) { + assert(stage_shader[stage] == NULL); + continue; + } + + vk_stages |= mesa_to_vk_shader_stage(stage); + + stages[stage_count] = stage; + shaders[stage_count] = stage_shader[stage]; + stage_count++; + } + ops->cmd_bind_shaders(cmd_buffer, stage_count, stages, shaders); + + if (gfx_pipeline != NULL) { + cmd_buffer->pipeline_shader_stages |= vk_stages; + ops->cmd_set_dynamic_graphics_state(cmd_buffer, + &gfx_pipeline->linked.dynamic); + } else { + cmd_buffer->pipeline_shader_stages &= ~vk_stages; + } +} + +static VkShaderCreateFlagsEXT +vk_pipeline_to_shader_flags(VkPipelineCreateFlags2KHR pipeline_flags, + gl_shader_stage stage) +{ + VkShaderCreateFlagsEXT shader_flags = 0; + + if (pipeline_flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) + shader_flags |= VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA; + + if (stage == MESA_SHADER_FRAGMENT) { + if (pipeline_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) + shader_flags |= VK_SHADER_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_EXT; + + if (pipeline_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT) + shader_flags |= VK_SHADER_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT; + } + + if (stage == MESA_SHADER_COMPUTE) { + if (pipeline_flags & VK_PIPELINE_CREATE_2_DISPATCH_BASE_BIT_KHR) + shader_flags |= VK_SHADER_CREATE_DISPATCH_BASE_BIT_EXT; + } + + return shader_flags; +} + +static VkResult +vk_graphics_pipeline_compile_shaders(struct vk_device *device, + struct vk_pipeline_cache *cache, + struct vk_graphics_pipeline *pipeline, + struct vk_pipeline_layout *pipeline_layout, + const struct vk_graphics_pipeline_state *state, + uint32_t stage_count, + struct vk_pipeline_stage *stages, + VkPipelineCreationFeedback *stage_feedbacks) +{ + const struct vk_device_shader_ops *ops = device->shader_ops; + VkResult result; + + if (stage_count == 0) + return VK_SUCCESS; + + /* If we're linking, throw away any previously compiled shaders as they + * likely haven't been properly linked. We keep the precompiled shaders + * and we still look it up in the cache so it may still be fast. + */ + if (pipeline->base.flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT) { + for (uint32_t i = 0; i < stage_count; i++) { + if (stages[i].shader != NULL) { + vk_shader_unref(device, stages[i].shader); + stages[i].shader = NULL; + } + } + } + + bool have_all_shaders = true; + VkShaderStageFlags all_stages = 0; + struct vk_pipeline_precomp_shader *tcs_precomp = NULL, *tes_precomp = NULL; + for (uint32_t i = 0; i < stage_count; i++) { + all_stages |= mesa_to_vk_shader_stage(stages[i].stage); + + if (stages[i].shader == NULL) + have_all_shaders = false; + + if (stages[i].stage == MESA_SHADER_TESS_CTRL) + tcs_precomp = stages[i].precomp; + + if (stages[i].stage == MESA_SHADER_TESS_EVAL) + tes_precomp = stages[i].precomp; + } + + /* If we already have a shader for each stage, there's nothing to do. */ + if (have_all_shaders) + return VK_SUCCESS; + + struct vk_pipeline_tess_info tess_info = { ._pad = 0 }; + if (tcs_precomp != NULL && tes_precomp != NULL) { + tess_info = tcs_precomp->tess; + vk_pipeline_tess_info_merge(&tess_info, &tes_precomp->tess); + } + + struct mesa_blake3 blake3_ctx; + _mesa_blake3_init(&blake3_ctx); + for (uint32_t i = 0; i < pipeline->set_layout_count; i++) { + if (pipeline->set_layouts[i] != NULL) { + _mesa_blake3_update(&blake3_ctx, pipeline->set_layouts[i]->blake3, + sizeof(pipeline->set_layouts[i]->blake3)); + } + } + if (pipeline_layout != NULL) { + _mesa_blake3_update(&blake3_ctx, &pipeline_layout->push_ranges, + sizeof(pipeline_layout->push_ranges[0]) * + pipeline_layout->push_range_count); + } + blake3_hash layout_blake3; + _mesa_blake3_final(&blake3_ctx, layout_blake3); + + /* Partition the shaders */ + uint32_t part_count; + uint32_t partition[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES + 1] = { 0 }; + if (pipeline->base.flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT) { + partition[1] = stage_count; + part_count = 1; + } else if (ops->link_geom_stages) { + if (stages[0].stage == MESA_SHADER_FRAGMENT) { + assert(stage_count == 1); + partition[1] = stage_count; + part_count = 1; + } else if (stages[stage_count - 1].stage == MESA_SHADER_FRAGMENT) { + /* In this case we have both */ + assert(stage_count > 1); + partition[1] = stage_count - 1; + partition[2] = stage_count; + part_count = 2; + } else { + /* In this case we only have geometry */ + partition[1] = stage_count; + part_count = 1; + } + } else { + /* Otherwise, we're don't want to link anything */ + part_count = stage_count; + for (uint32_t i = 0; i < stage_count; i++) + partition[i + 1] = i + 1; + } + + for (uint32_t p = 0; p < part_count; p++) { + const int64_t part_start = os_time_get_nano(); + + struct vk_shader_pipeline_cache_key shader_key = { 0 }; + + _mesa_blake3_init(&blake3_ctx); + + VkShaderStageFlags part_stages = 0; + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + const struct vk_pipeline_stage *stage = &stages[i]; + + part_stages |= mesa_to_vk_shader_stage(stage->stage); + _mesa_blake3_update(&blake3_ctx, stage->precomp->blake3, + sizeof(stage->precomp->blake3)); + + VkShaderCreateFlagsEXT shader_flags = + vk_pipeline_to_shader_flags(pipeline->base.flags, stage->stage); + _mesa_blake3_update(&blake3_ctx, &shader_flags, sizeof(shader_flags)); + } + + blake3_hash state_blake3; + ops->hash_graphics_state(device->physical, state, + part_stages, state_blake3); + + _mesa_blake3_update(&blake3_ctx, state_blake3, sizeof(state_blake3)); + _mesa_blake3_update(&blake3_ctx, layout_blake3, sizeof(layout_blake3)); + + if (part_stages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) + _mesa_blake3_update(&blake3_ctx, &tess_info, sizeof(tess_info)); + + /* The set of geometry stages used together is used to generate the + * nextStage mask as well as VK_SHADER_CREATE_NO_TASK_SHADER_BIT_EXT. + */ + const VkShaderStageFlags geom_stages = + all_stages & ~VK_SHADER_STAGE_FRAGMENT_BIT; + _mesa_blake3_update(&blake3_ctx, &geom_stages, sizeof(geom_stages)); + + _mesa_blake3_final(&blake3_ctx, shader_key.blake3); + + if (cache != NULL) { + /* From the Vulkan 1.3.278 spec: + * + * "VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT + * indicates that a readily usable pipeline or pipeline stage was + * found in the pipelineCache specified by the application in the + * pipeline creation command. + * + * [...] + * + * Note + * + * Implementations are encouraged to provide a meaningful signal + * to applications using this bit. The intention is to communicate + * to the application that the pipeline or pipeline stage was + * created “as fast as it gets” using the pipeline cache provided + * by the application. If an implementation uses an internal + * cache, it is discouraged from setting this bit as the feedback + * would be unactionable." + * + * The cache_hit value returned by vk_pipeline_cache_lookup_object() + * is only set to true when the shader is found in the provided + * pipeline cache. It is left false if we fail to find it in the + * memory cache but find it in the disk cache even though that's + * still a cache hit from the perspective of the compile pipeline. + */ + bool all_shaders_found = true; + bool all_cache_hits = true; + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + struct vk_pipeline_stage *stage = &stages[i]; + + shader_key.stage = stage->stage; + + bool cache_hit = false; + struct vk_pipeline_cache_object *cache_obj = + vk_pipeline_cache_lookup_object(cache, &shader_key, + sizeof(shader_key), + &pipeline_shader_cache_ops, + &cache_hit); + if (cache_obj != NULL) { + stage->shader = vk_shader_from_cache_obj(cache_obj); + } else { + all_shaders_found = false; + } + + if (cache_obj == NULL && !cache_hit) + all_cache_hits = false; + } + + if (all_cache_hits) { + /* The pipeline cache only really helps if we hit for everything + * in the partition. Otherwise, we have to go re-compile it all + * anyway. + */ + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + struct vk_pipeline_stage *stage = &stages[i]; + + stage_feedbacks[stage->stage].flags |= + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; + } + } + + if (all_shaders_found) { + /* Update duration to take cache lookups into account */ + const int64_t part_end = os_time_get_nano(); + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + struct vk_pipeline_stage *stage = &stages[i]; + stage_feedbacks[stage->stage].duration += part_end - part_start; + } + continue; + } + } + + if (pipeline->base.flags & + VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) + return VK_PIPELINE_COMPILE_REQUIRED; + + struct vk_shader_compile_info infos[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES]; + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + struct vk_pipeline_stage *stage = &stages[i]; + + VkShaderCreateFlagsEXT shader_flags = + vk_pipeline_to_shader_flags(pipeline->base.flags, stage->stage); + + if (partition[p + 1] - partition[p] > 1) + shader_flags |= VK_SHADER_CREATE_LINK_STAGE_BIT_EXT; + + if ((part_stages & VK_SHADER_STAGE_MESH_BIT_EXT) && + !(geom_stages & VK_SHADER_STAGE_TASK_BIT_EXT)) + shader_flags = VK_SHADER_CREATE_NO_TASK_SHADER_BIT_EXT; + + VkShaderStageFlags next_stage; + if (stage->stage == MESA_SHADER_FRAGMENT) { + next_stage = 0; + } else if (i + 1 < stage_count) { + /* We hash geom_stages above so this is safe */ + next_stage = mesa_to_vk_shader_stage(stages[i + 1].stage); + } else { + /* We're the last geometry stage */ + next_stage = VK_SHADER_STAGE_FRAGMENT_BIT; + } + + const struct nir_shader_compiler_options *nir_options = + ops->get_nir_options(device->physical, stage->stage, + &stage->precomp->rs); + + nir_shader *nir = + vk_pipeline_precomp_shader_get_nir(stage->precomp, nir_options); + if (nir == NULL) { + for (uint32_t j = partition[p]; j < i; j++) + ralloc_free(infos[i].nir); + + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (stage->stage == MESA_SHADER_TESS_CTRL || + stage->stage == MESA_SHADER_TESS_EVAL) + vk_pipeline_replace_nir_tess_info(nir, &tess_info); + + const VkPushConstantRange *push_range = NULL; + if (pipeline_layout != NULL) { + for (uint32_t r = 0; r < pipeline_layout->push_range_count; r++) { + if (pipeline_layout->push_ranges[r].stageFlags & + mesa_to_vk_shader_stage(stage->stage)) { + assert(push_range == NULL); + push_range = &pipeline_layout->push_ranges[r]; + } + } + } + + infos[i] = (struct vk_shader_compile_info) { + .stage = stage->stage, + .flags = shader_flags, + .next_stage_mask = next_stage, + .nir = nir, + .robustness = &stage->precomp->rs, + .set_layout_count = pipeline->set_layout_count, + .set_layouts = pipeline->set_layouts, + .push_constant_range_count = push_range != NULL, + .push_constant_ranges = push_range != NULL ? push_range : NULL, + }; + } + + /* vk_shader_ops::compile() consumes the NIR regardless of whether or + * not it succeeds and only generates shaders on success. Once this + * returns, we own the shaders but not the NIR in infos. + */ + struct vk_shader *shaders[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES]; + result = ops->compile(device, partition[p + 1] - partition[p], + &infos[partition[p]], + state, + &device->alloc, + &shaders[partition[p]]); + if (result != VK_SUCCESS) + return result; + + const int64_t part_end = os_time_get_nano(); + for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + struct vk_pipeline_stage *stage = &stages[i]; + + if (stage->shader == NULL) { + shader_key.stage = stage->stage; + vk_shader_init_cache_obj(device, shaders[i], &shader_key, + sizeof(shader_key)); + + struct vk_pipeline_cache_object *cache_obj = + &shaders[i]->pipeline.cache_obj; + if (cache != NULL) + cache_obj = vk_pipeline_cache_add_object(cache, cache_obj); + + stage->shader = vk_shader_from_cache_obj(cache_obj); + } else { + /* This can fail to happen if only some of the shaders were found + * in the pipeline cache. In this case, we just throw away the + * shader as vk_pipeline_cache_add_object() would throw it away + * for us anyway. + */ + vk_shader_destroy(device, shaders[i], &device->alloc); + } + + stage_feedbacks[stage->stage].duration += part_end - part_start; + } + } + + return VK_SUCCESS; +} + +static VkResult +vk_graphics_pipeline_get_executable_properties( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t *executable_count, + VkPipelineExecutablePropertiesKHR *properties) +{ + struct vk_graphics_pipeline *gfx_pipeline = + container_of(pipeline, struct vk_graphics_pipeline, base); + VkResult result; + + if (properties == NULL) { + *executable_count = 0; + for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) { + struct vk_shader *shader = gfx_pipeline->stages[i].shader; + + uint32_t shader_exec_count = 0; + result = shader->ops->get_executable_properties(device, shader, + &shader_exec_count, + NULL); + assert(result == VK_SUCCESS); + *executable_count += shader_exec_count; + } + } else { + uint32_t arr_len = *executable_count; + *executable_count = 0; + for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) { + struct vk_shader *shader = gfx_pipeline->stages[i].shader; + + uint32_t shader_exec_count = arr_len - *executable_count; + result = shader->ops->get_executable_properties(device, shader, + &shader_exec_count, + &properties[*executable_count]); + if (result != VK_SUCCESS) + return result; + + *executable_count += shader_exec_count; + } + } + + return VK_SUCCESS; +} + +static inline struct vk_shader * +vk_graphics_pipeline_executable_shader(struct vk_device *device, + struct vk_graphics_pipeline *gfx_pipeline, + uint32_t *executable_index) +{ + for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) { + struct vk_shader *shader = gfx_pipeline->stages[i].shader; + + uint32_t shader_exec_count = 0; + shader->ops->get_executable_properties(device, shader, + &shader_exec_count, NULL); + + if (*executable_index < shader_exec_count) + return shader; + else + *executable_index -= shader_exec_count; + } + + return NULL; +} + +static VkResult +vk_graphics_pipeline_get_executable_statistics( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *statistic_count, + VkPipelineExecutableStatisticKHR *statistics) +{ + struct vk_graphics_pipeline *gfx_pipeline = + container_of(pipeline, struct vk_graphics_pipeline, base); + + struct vk_shader *shader = + vk_graphics_pipeline_executable_shader(device, gfx_pipeline, + &executable_index); + if (shader == NULL) { + *statistic_count = 0; + return VK_SUCCESS; + } + + return shader->ops->get_executable_statistics(device, shader, + executable_index, + statistic_count, + statistics); +} + +static VkResult +vk_graphics_pipeline_get_internal_representations( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *internal_representation_count, + VkPipelineExecutableInternalRepresentationKHR* internal_representations) +{ + struct vk_graphics_pipeline *gfx_pipeline = + container_of(pipeline, struct vk_graphics_pipeline, base); + + struct vk_shader *shader = + vk_graphics_pipeline_executable_shader(device, gfx_pipeline, + &executable_index); + if (shader == NULL) { + *internal_representation_count = 0; + return VK_SUCCESS; + } + + return shader->ops->get_executable_internal_representations( + device, shader, executable_index, + internal_representation_count, internal_representations); +} + +static const struct vk_pipeline_ops vk_graphics_pipeline_ops = { + .destroy = vk_graphics_pipeline_destroy, + .get_executable_statistics = vk_graphics_pipeline_get_executable_statistics, + .get_executable_properties = vk_graphics_pipeline_get_executable_properties, + .get_internal_representations = vk_graphics_pipeline_get_internal_representations, + .cmd_bind = vk_graphics_pipeline_cmd_bind, +}; + +static VkResult +vk_create_graphics_pipeline(struct vk_device *device, + struct vk_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout); + const int64_t pipeline_start = os_time_get_nano(); + VkResult result; + + const VkPipelineCreateFlags2KHR pipeline_flags = + vk_graphics_pipeline_create_flags(pCreateInfo); + + const VkPipelineCreationFeedbackCreateInfo *feedback_info = + vk_find_struct_const(pCreateInfo->pNext, + PIPELINE_CREATION_FEEDBACK_CREATE_INFO); + + const VkPipelineLibraryCreateInfoKHR *libs_info = + vk_find_struct_const(pCreateInfo->pNext, + PIPELINE_LIBRARY_CREATE_INFO_KHR); + + struct vk_graphics_pipeline *pipeline = + vk_pipeline_zalloc(device, &vk_graphics_pipeline_ops, + VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_flags, pAllocator, sizeof(*pipeline)); + if (pipeline == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct vk_pipeline_stage stages[PIPE_SHADER_MESH_TYPES]; + memset(stages, 0, sizeof(stages)); + + VkPipelineCreationFeedback stage_feedbacks[PIPE_SHADER_MESH_TYPES]; + memset(stage_feedbacks, 0, sizeof(stage_feedbacks)); + + struct vk_graphics_pipeline_state state_tmp, *state; + struct vk_graphics_pipeline_all_state all_state_tmp, *all_state; + if (pipeline->base.flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) { + /* For pipeline libraries, the state is stored in the pipeline */ + state = &pipeline->lib.state; + all_state = &pipeline->lib.all_state; + } else { + /* For linked pipelines, we throw the state away at the end of pipeline + * creation and only keep the dynamic state. + */ + memset(&state_tmp, 0, sizeof(state_tmp)); + state = &state_tmp; + all_state = &all_state_tmp; + } + + /* If we have libraries, import them first. */ + if (libs_info) { + for (uint32_t i = 0; i < libs_info->libraryCount; i++) { + VK_FROM_HANDLE(vk_pipeline, lib_pipeline, libs_info->pLibraries[i]); + assert(lib_pipeline->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); + assert(lib_pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR); + struct vk_graphics_pipeline *lib_gfx_pipeline = + container_of(lib_pipeline, struct vk_graphics_pipeline, base); + + vk_graphics_pipeline_state_merge(state, &lib_gfx_pipeline->lib.state); + + pipeline->set_layout_count = MAX2(pipeline->set_layout_count, + lib_gfx_pipeline->set_layout_count); + for (uint32_t i = 0; i < lib_gfx_pipeline->set_layout_count; i++) { + if (lib_gfx_pipeline->set_layouts[i] == NULL) + continue; + + if (pipeline->set_layouts[i] == NULL) { + pipeline->set_layouts[i] = + vk_descriptor_set_layout_ref(lib_gfx_pipeline->set_layouts[i]); + } + } + + for (uint32_t i = 0; i < lib_gfx_pipeline->stage_count; i++) { + const struct vk_pipeline_stage *lib_stage = + &lib_gfx_pipeline->stages[i]; + + /* We shouldn't have duplicated stages in the imported pipeline + * but it's cheap enough to protect against it so we may as well. + */ + assert(lib_stage->stage < ARRAY_SIZE(stages)); + assert(vk_pipeline_stage_is_null(&stages[lib_stage->stage])); + if (!vk_pipeline_stage_is_null(&stages[lib_stage->stage])) + continue; + + stages[lib_stage->stage] = vk_pipeline_stage_clone(lib_stage); + } + } + } + + result = vk_graphics_pipeline_state_fill(device, state, + pCreateInfo, + NULL /* driver_rp */, + 0 /* driver_rp_flags */, + all_state, + NULL, 0, NULL); + if (result != VK_SUCCESS) + goto fail_stages; + + if (!(pipeline->base.flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) { + pipeline->linked.dynamic.vi = &pipeline->linked._dynamic_vi; + pipeline->linked.dynamic.ms.sample_locations = + &pipeline->linked._dynamic_sl; + vk_dynamic_graphics_state_fill(&pipeline->linked.dynamic, &state_tmp); + } + + if (pipeline_layout != NULL) { + pipeline->set_layout_count = MAX2(pipeline->set_layout_count, + pipeline_layout->set_count); + for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { + if (pipeline_layout->set_layouts[i] == NULL) + continue; + + if (pipeline->set_layouts[i] == NULL) { + pipeline->set_layouts[i] = + vk_descriptor_set_layout_ref(pipeline_layout->set_layouts[i]); + } + } + } + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + const VkPipelineShaderStageCreateInfo *stage_info = + &pCreateInfo->pStages[i]; + + const int64_t stage_start = os_time_get_nano(); + + assert(util_bitcount(stage_info->stage) == 1); + if (!(state->shader_stages & stage_info->stage)) + continue; + + gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage); + assert(vk_device_supports_stage(device, stage)); + + stage_feedbacks[stage].flags |= + VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; + + if (!vk_pipeline_stage_is_null(&stages[stage])) + continue; + + struct vk_pipeline_precomp_shader *precomp; + result = vk_pipeline_precompile_shader(device, cache, pipeline_flags, + pCreateInfo->pNext, + stage_info, + &precomp); + if (result != VK_SUCCESS) + goto fail_stages; + + stages[stage] = (struct vk_pipeline_stage) { + .stage = stage, + .precomp = precomp, + }; + + const int64_t stage_end = os_time_get_nano(); + stage_feedbacks[stage].duration += stage_end - stage_start; + } + + /* Compact the array of stages */ + uint32_t stage_count = 0; + for (uint32_t s = 0; s < ARRAY_SIZE(stages); s++) { + assert(s >= stage_count); + if (!vk_pipeline_stage_is_null(&stages[s])) + stages[stage_count++] = stages[s]; + } + for (uint32_t s = stage_count; s < ARRAY_SIZE(stages); s++) + memset(&stages[s], 0, sizeof(stages[s])); + + /* Sort so we always give the driver shaders in order. + * + * This makes everything easier for everyone. This also helps stabilize + * shader keys so that we get a cache hit even if the client gives us + * the stages in a different order. + */ + qsort(stages, stage_count, sizeof(*stages), cmp_vk_pipeline_stages); + + result = vk_graphics_pipeline_compile_shaders(device, cache, pipeline, + pipeline_layout, state, + stage_count, stages, + stage_feedbacks); + if (result != VK_SUCCESS) + goto fail_stages; + + /* Throw away precompiled shaders unless the client explicitly asks us to + * keep them. + */ + if (!(pipeline_flags & + VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT)) { + for (uint32_t i = 0; i < stage_count; i++) { + if (stages[i].precomp != NULL) { + vk_pipeline_precomp_shader_unref(device, stages[i].precomp); + stages[i].precomp = NULL; + } + } + } + + pipeline->stage_count = stage_count; + for (uint32_t i = 0; i < stage_count; i++) + pipeline->stages[i] = stages[i]; + + const int64_t pipeline_end = os_time_get_nano(); + if (feedback_info != NULL) { + VkPipelineCreationFeedback pipeline_feedback = { + .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, + .duration = pipeline_end - pipeline_start, + }; + + /* From the Vulkan 1.3.275 spec: + * + * "An implementation should set the + * VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT + * bit if it was able to avoid the large majority of pipeline or + * pipeline stage creation work by using the pipelineCache parameter" + * + * We really shouldn't set this bit unless all the shaders hit the + * cache. + */ + uint32_t cache_hit_count = 0; + for (uint32_t i = 0; i < stage_count; i++) { + const gl_shader_stage stage = stages[i].stage; + if (stage_feedbacks[stage].flags & + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT) + cache_hit_count++; + } + if (cache_hit_count > 0 && cache_hit_count == stage_count) { + pipeline_feedback.flags |= + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; + } + + *feedback_info->pPipelineCreationFeedback = pipeline_feedback; + + /* VUID-VkGraphicsPipelineCreateInfo-pipelineStageCreationFeedbackCount-06594 */ + assert(feedback_info->pipelineStageCreationFeedbackCount == 0 || + feedback_info->pipelineStageCreationFeedbackCount == + pCreateInfo->stageCount); + for (uint32_t i = 0; + i < feedback_info->pipelineStageCreationFeedbackCount; i++) { + const gl_shader_stage stage = + vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage); + + feedback_info->pPipelineStageCreationFeedbacks[i] = + stage_feedbacks[stage]; + } + } + + *pPipeline = vk_pipeline_to_handle(&pipeline->base); + + return VK_SUCCESS; + +fail_stages: + for (uint32_t i = 0; i < ARRAY_SIZE(stages); i++) + vk_pipeline_stage_finish(device, &stages[i]); + + vk_graphics_pipeline_destroy(device, &pipeline->base, pAllocator); + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateGraphicsPipelines(VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkGraphicsPipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache); + VkResult first_error_or_success = VK_SUCCESS; + + /* From the Vulkan 1.3.274 spec: + * + * "When attempting to create many pipelines in a single command, it is + * possible that creation may fail for a subset of them. In this case, + * the corresponding elements of pPipelines will be set to + * VK_NULL_HANDLE. + */ + memset(pPipelines, 0, createInfoCount * sizeof(*pPipelines)); + + unsigned i = 0; + for (; i < createInfoCount; i++) { + VkResult result = vk_create_graphics_pipeline(device, cache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + if (result == VK_SUCCESS) + continue; + + if (first_error_or_success == VK_SUCCESS) + first_error_or_success = result; + + /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it + * is not obvious what error should be report upon 2 different failures. + */ + if (result != VK_PIPELINE_COMPILE_REQUIRED) + return result; + + const VkPipelineCreateFlags2KHR flags = + vk_graphics_pipeline_create_flags(&pCreateInfos[i]); + if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR) + return result; + } + + return first_error_or_success; +} + +struct vk_compute_pipeline { + struct vk_pipeline base; + struct vk_shader *shader; +}; + +static void +vk_compute_pipeline_destroy(struct vk_device *device, + struct vk_pipeline *pipeline, + const VkAllocationCallbacks *pAllocator) +{ + struct vk_compute_pipeline *comp_pipeline = + container_of(pipeline, struct vk_compute_pipeline, base); + + vk_shader_unref(device, comp_pipeline->shader); + vk_pipeline_free(device, pAllocator, pipeline); +} + +static void +vk_compute_pipeline_cmd_bind(struct vk_command_buffer *cmd_buffer, + struct vk_pipeline *pipeline) +{ + struct vk_device *device = cmd_buffer->base.device; + const struct vk_device_shader_ops *ops = device->shader_ops; + + struct vk_shader *shader = NULL; + if (pipeline != NULL) { + assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE); + struct vk_compute_pipeline *comp_pipeline = + container_of(pipeline, struct vk_compute_pipeline, base); + + shader = comp_pipeline->shader; + + cmd_buffer->pipeline_shader_stages |= VK_SHADER_STAGE_COMPUTE_BIT; + } else { + cmd_buffer->pipeline_shader_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + gl_shader_stage stage = MESA_SHADER_COMPUTE; + ops->cmd_bind_shaders(cmd_buffer, 1, &stage, &shader); +} + +static VkResult +vk_pipeline_compile_compute_stage(struct vk_device *device, + struct vk_pipeline_cache *cache, + struct vk_compute_pipeline *pipeline, + struct vk_pipeline_layout *pipeline_layout, + struct vk_pipeline_stage *stage, + bool *cache_hit) +{ + const struct vk_device_shader_ops *ops = device->shader_ops; + VkResult result; + + const VkPushConstantRange *push_range = NULL; + if (pipeline_layout != NULL) { + for (uint32_t r = 0; r < pipeline_layout->push_range_count; r++) { + if (pipeline_layout->push_ranges[r].stageFlags & + VK_SHADER_STAGE_COMPUTE_BIT) { + assert(push_range == NULL); + push_range = &pipeline_layout->push_ranges[r]; + } + } + } + + VkShaderCreateFlagsEXT shader_flags = + vk_pipeline_to_shader_flags(pipeline->base.flags, MESA_SHADER_COMPUTE); + + struct mesa_blake3 blake3_ctx; + _mesa_blake3_init(&blake3_ctx); + + _mesa_blake3_update(&blake3_ctx, stage->precomp->blake3, + sizeof(stage->precomp->blake3)); + + _mesa_blake3_update(&blake3_ctx, &shader_flags, sizeof(shader_flags)); + + for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { + if (pipeline_layout->set_layouts[i] != NULL) { + _mesa_blake3_update(&blake3_ctx, + pipeline_layout->set_layouts[i]->blake3, + sizeof(pipeline_layout->set_layouts[i]->blake3)); + } + } + if (push_range != NULL) + _mesa_blake3_update(&blake3_ctx, push_range, sizeof(*push_range)); + + struct vk_shader_pipeline_cache_key shader_key = { + .stage = MESA_SHADER_COMPUTE, + }; + _mesa_blake3_final(&blake3_ctx, shader_key.blake3); + + if (cache != NULL) { + struct vk_pipeline_cache_object *cache_obj = + vk_pipeline_cache_lookup_object(cache, &shader_key, + sizeof(shader_key), + &pipeline_shader_cache_ops, + cache_hit); + if (cache_obj != NULL) { + stage->shader = vk_shader_from_cache_obj(cache_obj); + return VK_SUCCESS; + } + } + + if (pipeline->base.flags & + VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) + return VK_PIPELINE_COMPILE_REQUIRED; + + const struct nir_shader_compiler_options *nir_options = + ops->get_nir_options(device->physical, stage->stage, + &stage->precomp->rs); + + nir_shader *nir = vk_pipeline_precomp_shader_get_nir(stage->precomp, + nir_options); + if (nir == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* vk_device_shader_ops::compile() consumes the NIR regardless of whether + * or not it succeeds and only generates shaders on success. Once compile() + * returns, we own the shaders but not the NIR in infos. + */ + struct vk_shader_compile_info compile_info = { + .stage = stage->stage, + .flags = shader_flags, + .next_stage_mask = 0, + .nir = nir, + .robustness = &stage->precomp->rs, + .set_layout_count = pipeline_layout->set_count, + .set_layouts = pipeline_layout->set_layouts, + .push_constant_range_count = push_range != NULL, + .push_constant_ranges = push_range != NULL ? push_range : NULL, + }; + + struct vk_shader *shader; + result = ops->compile(device, 1, &compile_info, NULL, + &device->alloc, &shader); + if (result != VK_SUCCESS) + return result; + + vk_shader_init_cache_obj(device, shader, &shader_key, sizeof(shader_key)); + + struct vk_pipeline_cache_object *cache_obj = &shader->pipeline.cache_obj; + if (cache != NULL) + cache_obj = vk_pipeline_cache_add_object(cache, cache_obj); + + stage->shader = vk_shader_from_cache_obj(cache_obj); + + return VK_SUCCESS; +} + +static VkResult +vk_compute_pipeline_get_executable_properties( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t *executable_count, + VkPipelineExecutablePropertiesKHR *properties) +{ + struct vk_compute_pipeline *comp_pipeline = + container_of(pipeline, struct vk_compute_pipeline, base); + struct vk_shader *shader = comp_pipeline->shader; + + return shader->ops->get_executable_properties(device, shader, + executable_count, + properties); +} + +static VkResult +vk_compute_pipeline_get_executable_statistics( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *statistic_count, + VkPipelineExecutableStatisticKHR *statistics) +{ + struct vk_compute_pipeline *comp_pipeline = + container_of(pipeline, struct vk_compute_pipeline, base); + struct vk_shader *shader = comp_pipeline->shader; + + return shader->ops->get_executable_statistics(device, shader, + executable_index, + statistic_count, + statistics); +} + +static VkResult +vk_compute_pipeline_get_internal_representations( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *internal_representation_count, + VkPipelineExecutableInternalRepresentationKHR* internal_representations) +{ + struct vk_compute_pipeline *comp_pipeline = + container_of(pipeline, struct vk_compute_pipeline, base); + struct vk_shader *shader = comp_pipeline->shader; + + return shader->ops->get_executable_internal_representations( + device, shader, executable_index, + internal_representation_count, internal_representations); +} + +static const struct vk_pipeline_ops vk_compute_pipeline_ops = { + .destroy = vk_compute_pipeline_destroy, + .get_executable_statistics = vk_compute_pipeline_get_executable_statistics, + .get_executable_properties = vk_compute_pipeline_get_executable_properties, + .get_internal_representations = vk_compute_pipeline_get_internal_representations, + .cmd_bind = vk_compute_pipeline_cmd_bind, +}; + +static VkResult +vk_create_compute_pipeline(struct vk_device *device, + struct vk_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout); + int64_t pipeline_start = os_time_get_nano(); + VkResult result; + + const VkPipelineCreateFlags2KHR pipeline_flags = + vk_compute_pipeline_create_flags(pCreateInfo); + + const VkPipelineCreationFeedbackCreateInfo *feedback_info = + vk_find_struct_const(pCreateInfo->pNext, + PIPELINE_CREATION_FEEDBACK_CREATE_INFO); + + struct vk_compute_pipeline *pipeline = + vk_pipeline_zalloc(device, &vk_compute_pipeline_ops, + VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_flags, pAllocator, sizeof(*pipeline)); + if (pipeline == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct vk_pipeline_stage stage = { + .stage = MESA_SHADER_COMPUTE, + }; + result = vk_pipeline_precompile_shader(device, cache, pipeline_flags, + pCreateInfo->pNext, + &pCreateInfo->stage, + &stage.precomp); + if (result != VK_SUCCESS) + goto fail_pipeline; + + bool cache_hit; + result = vk_pipeline_compile_compute_stage(device, cache, pipeline, + pipeline_layout, &stage, + &cache_hit); + if (result != VK_SUCCESS) + goto fail_stage; + + if (stage.precomp != NULL) + vk_pipeline_precomp_shader_unref(device, stage.precomp); + pipeline->shader = stage.shader; + + const int64_t pipeline_end = os_time_get_nano(); + if (feedback_info != NULL) { + VkPipelineCreationFeedback pipeline_feedback = { + .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, + .duration = pipeline_end - pipeline_start, + }; + if (cache_hit) { + pipeline_feedback.flags |= + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; + } + + *feedback_info->pPipelineCreationFeedback = pipeline_feedback; + if (feedback_info->pipelineStageCreationFeedbackCount > 0) { + feedback_info->pPipelineStageCreationFeedbacks[0] = + pipeline_feedback; + } + } + + *pPipeline = vk_pipeline_to_handle(&pipeline->base); + + return VK_SUCCESS; + +fail_stage: + vk_pipeline_stage_finish(device, &stage); +fail_pipeline: + vk_pipeline_free(device, pAllocator, &pipeline->base); + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateComputePipelines(VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkComputePipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache); + VkResult first_error_or_success = VK_SUCCESS; + + /* From the Vulkan 1.3.274 spec: + * + * "When attempting to create many pipelines in a single command, it is + * possible that creation may fail for a subset of them. In this case, + * the corresponding elements of pPipelines will be set to + * VK_NULL_HANDLE. + */ + memset(pPipelines, 0, createInfoCount * sizeof(*pPipelines)); + + unsigned i = 0; + for (; i < createInfoCount; i++) { + VkResult result = vk_create_compute_pipeline(device, cache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + if (result == VK_SUCCESS) + continue; + + if (first_error_or_success == VK_SUCCESS) + first_error_or_success = result; + + /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it + * is not obvious what error should be report upon 2 different failures. + */ + if (result != VK_PIPELINE_COMPILE_REQUIRED) + return result; + + const VkPipelineCreateFlags2KHR flags = + vk_compute_pipeline_create_flags(&pCreateInfos[i]); + if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR) + return result; + } + + return first_error_or_success; +} + +void +vk_cmd_unbind_pipelines_for_stages(struct vk_command_buffer *cmd_buffer, + VkShaderStageFlags stages) +{ + stages &= cmd_buffer->pipeline_shader_stages; + + if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT) + vk_graphics_pipeline_cmd_bind(cmd_buffer, NULL); + + if (stages & VK_SHADER_STAGE_COMPUTE_BIT) + vk_compute_pipeline_cmd_bind(cmd_buffer, NULL); +} diff --git a/src/vulkan/runtime/vk_pipeline.h b/src/vulkan/runtime/vk_pipeline.h index 62ae730e1e4..ed05d567a8f 100644 --- a/src/vulkan/runtime/vk_pipeline.h +++ b/src/vulkan/runtime/vk_pipeline.h @@ -199,6 +199,10 @@ void vk_pipeline_free(struct vk_device *device, const VkAllocationCallbacks *alloc, struct vk_pipeline *pipeline); +void +vk_cmd_unbind_pipelines_for_stages(struct vk_command_buffer *cmd_buffer, + VkShaderStageFlags stages); + #ifdef __cplusplus } #endif diff --git a/src/vulkan/runtime/vk_shader.c b/src/vulkan/runtime/vk_shader.c index 41c41b2f363..6f377fa6f21 100644 --- a/src/vulkan/runtime/vk_shader.c +++ b/src/vulkan/runtime/vk_shader.c @@ -369,7 +369,10 @@ vk_common_GetShaderBinaryDataEXT(VkDevice _device, return result; } -#define VK_MAX_LINKED_SHADER_STAGES 5 +/* The only place where we have "real" linking is graphics shaders and there + * is a limit as to how many of them can be linked together at one time. + */ +#define VK_MAX_LINKED_SHADER_STAGES MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES VKAPI_ATTR VkResult VKAPI_CALL vk_common_CreateShadersEXT(VkDevice _device, @@ -552,10 +555,16 @@ vk_common_CmdBindShadersEXT(VkCommandBuffer commandBuffer, STACK_ARRAY(gl_shader_stage, stages, stageCount); STACK_ARRAY(struct vk_shader *, shaders, stageCount); + VkShaderStageFlags vk_stages = 0; for (uint32_t i = 0; i < stageCount; i++) { + vk_stages |= pStages[i]; stages[i] = vk_to_mesa_shader_stage(pStages[i]); shaders[i] = pShaders != NULL ? vk_shader_from_handle(pShaders[i]) : NULL; } + vk_cmd_unbind_pipelines_for_stages(cmd_buffer, vk_stages); + if (vk_stages & ~VK_SHADER_STAGE_COMPUTE_BIT) + vk_cmd_set_rp_attachments(cmd_buffer, ~0); + ops->cmd_bind_shaders(cmd_buffer, stageCount, stages, shaders); } diff --git a/src/vulkan/runtime/vk_shader.h b/src/vulkan/runtime/vk_shader.h index 0ee6e7681c3..8fb5090b129 100644 --- a/src/vulkan/runtime/vk_shader.h +++ b/src/vulkan/runtime/vk_shader.h @@ -28,6 +28,8 @@ #include "vk_limits.h" #include "vk_pipeline_cache.h" +#include "util/mesa-blake3.h" + #ifdef __cplusplus extern "C" { #endif @@ -45,6 +47,8 @@ struct vk_pipeline_robustness_state; int vk_shader_cmp_graphics_stages(gl_shader_stage a, gl_shader_stage b); +#define VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA 0x1000 + struct vk_shader_compile_info { gl_shader_stage stage; VkShaderCreateFlagsEXT flags; @@ -62,12 +66,30 @@ struct vk_shader_compile_info { struct vk_shader_ops; +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wpadded" +#endif +struct vk_shader_pipeline_cache_key { + gl_shader_stage stage; + blake3_hash blake3; +}; +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + struct vk_shader { struct vk_object_base base; const struct vk_shader_ops *ops; gl_shader_stage stage; + + /* Used for the generic VkPipeline implementation */ + struct { + struct vk_pipeline_cache_object cache_obj; + struct vk_shader_pipeline_cache_key cache_key; + } pipeline; }; VK_DEFINE_NONDISP_HANDLE_CASTS(vk_shader, base, VkShaderEXT, @@ -90,6 +112,39 @@ struct vk_shader_ops { bool (*serialize)(struct vk_device *device, const struct vk_shader *shader, struct blob *blob); + + /** Returns executable properties for this shader + * + * This is equivalent to vkGetPipelineExecutableProperties(), only for a + * single vk_shader. + */ + VkResult (*get_executable_properties)(struct vk_device *device, + const struct vk_shader *shader, + uint32_t *executable_count, + VkPipelineExecutablePropertiesKHR *properties); + + /** Returns executable statistics for this shader + * + * This is equivalent to vkGetPipelineExecutableStatistics(), only for a + * single vk_shader. + */ + VkResult (*get_executable_statistics)(struct vk_device *device, + const struct vk_shader *shader, + uint32_t executable_index, + uint32_t *statistic_count, + VkPipelineExecutableStatisticKHR *statistics); + + /** Returns executable internal representations for this shader + * + * This is equivalent to vkGetPipelineExecutableInternalRepresentations(), + * only for a single vk_shader. + */ + VkResult (*get_executable_internal_representations)( + struct vk_device *device, + const struct vk_shader *shader, + uint32_t executable_index, + uint32_t *internal_representation_count, + VkPipelineExecutableInternalRepresentationKHR *internal_representations); }; void *vk_shader_zalloc(struct vk_device *device, @@ -143,6 +198,23 @@ struct vk_device_shader_ops { */ void (*preprocess_nir)(struct vk_physical_device *device, nir_shader *nir); + /** True if the driver wants geometry stages linked + * + * If set to true, geometry stages will always be compiled with + * VK_SHADER_CREATE_LINK_STAGE_BIT_EXT when pipelines are used. + */ + bool link_geom_stages; + + /** Hash a vk_graphics_state object + * + * This callback hashes whatever bits of vk_graphics_pipeline_state might + * be used to compile a shader in one of the given stages. + */ + void (*hash_graphics_state)(struct vk_physical_device *device, + const struct vk_graphics_pipeline_state *state, + VkShaderStageFlags stages, + blake3_hash blake3_out); + /** Compile (and potentially link) a set of shaders * * Unlike vkCreateShadersEXT, this callback will only ever be called with @@ -175,6 +247,10 @@ struct vk_device_shader_ops { uint32_t stage_count, const gl_shader_stage *stages, struct vk_shader ** const shaders); + + /** Sets dynamic state */ + void (*cmd_set_dynamic_graphics_state)(struct vk_command_buffer *cmd_buffer, + const struct vk_dynamic_graphics_state *state); }; #ifdef __cplusplus |