summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFaith Ekstrand <faith.ekstrand@collabora.com>2024-01-02 17:37:47 -0600
committerMarge Bot <emma+marge@anholt.net>2024-02-27 22:17:09 +0000
commit9308e8d90d26fca678fe72380d899cdae77415b5 (patch)
tree443d9f8f754537df78e139fa1863092a4969c9a2
parentc488dc9f5089033d3f630fa5a84d7a7ebd16cf89 (diff)
vulkan: Add generic graphics and compute VkPipeline implementations
These implementations are built on top of vk_shader. For the most part, the driver shouldn't notice a difference between draws consuming pipelines vs. draws consuming shaders. The only real difference is that, when vk_driver_shader_ops::compile() is called for pipelines, a struct vk_graphics_pipeline_state is provided. For shader objects, the state object will be NULL indicating that all state is unknown. Besides that, all the rest of the differences between Vulkan 1.0 pipelines, VK_EXT_graphics_pipeline_library, and VK_EXT_shader_object are handled by the Vulkan runtime code. Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27024>
-rw-r--r--src/vulkan/runtime/vk_command_buffer.h8
-rw-r--r--src/vulkan/runtime/vk_limits.h3
-rw-r--r--src/vulkan/runtime/vk_pipeline.c1730
-rw-r--r--src/vulkan/runtime/vk_pipeline.h4
-rw-r--r--src/vulkan/runtime/vk_shader.c11
-rw-r--r--src/vulkan/runtime/vk_shader.h76
6 files changed, 1830 insertions, 2 deletions
diff --git a/src/vulkan/runtime/vk_command_buffer.h b/src/vulkan/runtime/vk_command_buffer.h
index e49b3077d34..6ae637ca5ab 100644
--- a/src/vulkan/runtime/vk_command_buffer.h
+++ b/src/vulkan/runtime/vk_command_buffer.h
@@ -185,6 +185,14 @@ struct vk_command_buffer {
struct vk_attachment_state _attachments[8];
VkRenderPassSampleLocationsBeginInfoEXT *pass_sample_locations;
+
+ /**
+ * Bitmask of shader stages bound via a vk_pipeline since the last call to
+ * vkBindShadersEXT().
+ *
+ * Used by the common vk_pipeline implementation
+ */
+ VkShaderStageFlags pipeline_shader_stages;
};
VK_DEFINE_HANDLE_CASTS(vk_command_buffer, base, VkCommandBuffer,
diff --git a/src/vulkan/runtime/vk_limits.h b/src/vulkan/runtime/vk_limits.h
index e4756794b08..50bfde0c0eb 100644
--- a/src/vulkan/runtime/vk_limits.h
+++ b/src/vulkan/runtime/vk_limits.h
@@ -24,6 +24,9 @@
#ifndef VK_LIMITS_H
#define VK_LIMITS_H
+/* Maximun number of shader stages in a single graphics pipeline */
+#define MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES 5
+
#define MESA_VK_MAX_DESCRIPTOR_SETS 32
/* From the Vulkan 1.3.274 spec:
diff --git a/src/vulkan/runtime/vk_pipeline.c b/src/vulkan/runtime/vk_pipeline.c
index ac7b66a13f4..c914120b87b 100644
--- a/src/vulkan/runtime/vk_pipeline.c
+++ b/src/vulkan/runtime/vk_pipeline.c
@@ -23,18 +23,23 @@
#include "vk_pipeline.h"
+#include "vk_alloc.h"
#include "vk_common_entrypoints.h"
#include "vk_command_buffer.h"
+#include "vk_descriptor_set_layout.h"
#include "vk_device.h"
+#include "vk_graphics_state.h"
#include "vk_log.h"
#include "vk_nir.h"
+#include "vk_physical_device.h"
+#include "vk_pipeline_layout.h"
+#include "vk_shader.h"
#include "vk_shader_module.h"
#include "vk_util.h"
#include "nir_serialize.h"
#include "util/mesa-sha1.h"
-#include "util/mesa-blake3.h"
bool
vk_pipeline_shader_stage_is_null(const VkPipelineShaderStageCreateInfo *info)
@@ -419,3 +424,1726 @@ vk_common_CmdBindPipeline(VkCommandBuffer commandBuffer,
pipeline->ops->cmd_bind(cmd_buffer, pipeline);
}
+
+static const struct vk_pipeline_cache_object_ops pipeline_shader_cache_ops;
+
+static struct vk_shader *
+vk_shader_from_cache_obj(struct vk_pipeline_cache_object *object)
+{
+ assert(object->ops == &pipeline_shader_cache_ops);
+ return container_of(object, struct vk_shader, pipeline.cache_obj);
+}
+
+static bool
+vk_pipeline_shader_serialize(struct vk_pipeline_cache_object *object,
+ struct blob *blob)
+{
+ struct vk_shader *shader = vk_shader_from_cache_obj(object);
+ struct vk_device *device = shader->base.device;
+
+ return shader->ops->serialize(device, shader, blob);
+}
+
+static void
+vk_shader_init_cache_obj(struct vk_device *device, struct vk_shader *shader,
+ const void *key_data, size_t key_size)
+{
+ assert(key_size == sizeof(shader->pipeline.cache_key));
+ memcpy(&shader->pipeline.cache_key, key_data,
+ sizeof(shader->pipeline.cache_key));
+
+ vk_pipeline_cache_object_init(device, &shader->pipeline.cache_obj,
+ &pipeline_shader_cache_ops,
+ &shader->pipeline.cache_key,
+ sizeof(shader->pipeline.cache_key));
+}
+
+static struct vk_pipeline_cache_object *
+vk_pipeline_shader_deserialize(struct vk_pipeline_cache *cache,
+ const void *key_data, size_t key_size,
+ struct blob_reader *blob)
+{
+ struct vk_device *device = cache->base.device;
+ const struct vk_device_shader_ops *ops = device->shader_ops;
+
+ /* TODO: Do we really want to always use the latest version? */
+ const uint32_t version = device->physical->properties.shaderBinaryVersion;
+
+ struct vk_shader *shader;
+ VkResult result = ops->deserialize(device, blob, version,
+ &device->alloc, &shader);
+ if (result != VK_SUCCESS) {
+ assert(result == VK_ERROR_OUT_OF_HOST_MEMORY);
+ return NULL;
+ }
+
+ vk_shader_init_cache_obj(device, shader, key_data, key_size);
+
+ return &shader->pipeline.cache_obj;
+}
+
+static void
+vk_pipeline_shader_destroy(struct vk_device *device,
+ struct vk_pipeline_cache_object *object)
+{
+ struct vk_shader *shader = vk_shader_from_cache_obj(object);
+ assert(shader->base.device == device);
+
+ vk_shader_destroy(device, shader, &device->alloc);
+}
+
+static const struct vk_pipeline_cache_object_ops pipeline_shader_cache_ops = {
+ .serialize = vk_pipeline_shader_serialize,
+ .deserialize = vk_pipeline_shader_deserialize,
+ .destroy = vk_pipeline_shader_destroy,
+};
+
+static struct vk_shader *
+vk_shader_ref(struct vk_shader *shader)
+{
+ vk_pipeline_cache_object_ref(&shader->pipeline.cache_obj);
+ return shader;
+}
+
+static void
+vk_shader_unref(struct vk_device *device, struct vk_shader *shader)
+{
+ vk_pipeline_cache_object_unref(device, &shader->pipeline.cache_obj);
+}
+
+struct vk_pipeline_tess_info {
+ unsigned tcs_vertices_out : 8;
+ unsigned primitive_mode : 2; /* tess_primitive_mode */
+ unsigned spacing : 2; /* gl_tess_spacing */
+ unsigned ccw : 1;
+ unsigned point_mode : 1;
+ unsigned _pad : 18;
+};
+static_assert(sizeof(struct vk_pipeline_tess_info) == 4,
+ "This struct has no holes");
+
+static void
+vk_pipeline_gather_nir_tess_info(const nir_shader *nir,
+ struct vk_pipeline_tess_info *info)
+{
+ info->tcs_vertices_out = nir->info.tess.tcs_vertices_out;
+ info->primitive_mode = nir->info.tess._primitive_mode;
+ info->spacing = nir->info.tess.spacing;
+ info->ccw = nir->info.tess.ccw;
+ info->point_mode = nir->info.tess.point_mode;
+}
+
+static void
+vk_pipeline_replace_nir_tess_info(nir_shader *nir,
+ const struct vk_pipeline_tess_info *info)
+{
+ nir->info.tess.tcs_vertices_out = info->tcs_vertices_out;
+ nir->info.tess._primitive_mode = info->primitive_mode;
+ nir->info.tess.spacing = info->spacing;
+ nir->info.tess.ccw = info->ccw;
+ nir->info.tess.point_mode = info->point_mode;
+}
+
+static void
+vk_pipeline_tess_info_merge(struct vk_pipeline_tess_info *dst,
+ const struct vk_pipeline_tess_info *src)
+{
+ /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
+ *
+ * "PointMode. Controls generation of points rather than triangles
+ * or lines. This functionality defaults to disabled, and is
+ * enabled if either shader stage includes the execution mode.
+ *
+ * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
+ * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
+ * and OutputVertices, it says:
+ *
+ * "One mode must be set in at least one of the tessellation
+ * shader stages."
+ *
+ * So, the fields can be set in either the TCS or TES, but they must
+ * agree if set in both.
+ */
+ assert(dst->tcs_vertices_out == 0 ||
+ src->tcs_vertices_out == 0 ||
+ dst->tcs_vertices_out == src->tcs_vertices_out);
+ dst->tcs_vertices_out |= src->tcs_vertices_out;
+
+ static_assert(TESS_SPACING_UNSPECIFIED == 0, "");
+ assert(dst->spacing == TESS_SPACING_UNSPECIFIED ||
+ src->spacing == TESS_SPACING_UNSPECIFIED ||
+ dst->spacing == src->spacing);
+ dst->spacing |= src->spacing;
+
+ static_assert(TESS_PRIMITIVE_UNSPECIFIED == 0, "");
+ assert(dst->primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
+ src->primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
+ dst->primitive_mode == src->primitive_mode);
+ dst->primitive_mode |= src->primitive_mode;
+ dst->ccw |= src->ccw;
+ dst->point_mode |= src->point_mode;
+}
+
+struct vk_pipeline_precomp_shader {
+ struct vk_pipeline_cache_object cache_obj;
+
+ /* Key for this cache_obj in the pipeline cache.
+ *
+ * This is always the output of vk_pipeline_hash_shader_stage() so it must
+ * be a SHA1 hash.
+ */
+ uint8_t cache_key[SHA1_DIGEST_LENGTH];
+
+ gl_shader_stage stage;
+
+ struct vk_pipeline_robustness_state rs;
+
+ /* Tessellation info if the shader is a tessellation shader */
+ struct vk_pipeline_tess_info tess;
+
+ /* Hash of the vk_pipeline_precomp_shader
+ *
+ * This is the hash of the final compiled NIR together with tess info and
+ * robustness state. It's used as a key for final binary lookups. By
+ * having this as a separate key, we can de-duplicate cases where you have
+ * different SPIR-V or specialization constants but end up compiling the
+ * same NIR shader in the end anyway.
+ */
+ blake3_hash blake3;
+
+ struct blob nir_blob;
+};
+
+static struct vk_pipeline_precomp_shader *
+vk_pipeline_precomp_shader_ref(struct vk_pipeline_precomp_shader *shader)
+{
+ vk_pipeline_cache_object_ref(&shader->cache_obj);
+ return shader;
+}
+
+static void
+vk_pipeline_precomp_shader_unref(struct vk_device *device,
+ struct vk_pipeline_precomp_shader *shader)
+{
+ vk_pipeline_cache_object_unref(device, &shader->cache_obj);
+}
+
+static const struct vk_pipeline_cache_object_ops pipeline_precomp_shader_cache_ops;
+
+static struct vk_pipeline_precomp_shader *
+vk_pipeline_precomp_shader_from_cache_obj(struct vk_pipeline_cache_object *obj)
+{
+ assert(obj->ops == & pipeline_precomp_shader_cache_ops);
+ return container_of(obj, struct vk_pipeline_precomp_shader, cache_obj);
+}
+
+static struct vk_pipeline_precomp_shader *
+vk_pipeline_precomp_shader_create(struct vk_device *device,
+ const void *key_data, size_t key_size,
+ const struct vk_pipeline_robustness_state *rs,
+ nir_shader *nir)
+{
+ struct blob blob;
+ blob_init(&blob);
+
+ nir_serialize(&blob, nir, false);
+
+ if (blob.out_of_memory)
+ goto fail_blob;
+
+ struct vk_pipeline_precomp_shader *shader =
+ vk_zalloc(&device->alloc, sizeof(*shader), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (shader == NULL)
+ goto fail_blob;
+
+ assert(sizeof(shader->cache_key) == key_size);
+ memcpy(shader->cache_key, key_data, sizeof(shader->cache_key));
+
+ vk_pipeline_cache_object_init(device, &shader->cache_obj,
+ &pipeline_precomp_shader_cache_ops,
+ shader->cache_key,
+ sizeof(shader->cache_key));
+
+ shader->stage = nir->info.stage;
+ shader->rs = *rs;
+
+ vk_pipeline_gather_nir_tess_info(nir, &shader->tess);
+
+ struct mesa_blake3 blake3_ctx;
+ _mesa_blake3_init(&blake3_ctx);
+ _mesa_blake3_update(&blake3_ctx, rs, sizeof(*rs));
+ _mesa_blake3_update(&blake3_ctx, blob.data, blob.size);
+ _mesa_blake3_final(&blake3_ctx, shader->blake3);
+
+ shader->nir_blob = blob;
+
+ return shader;
+
+fail_blob:
+ blob_finish(&blob);
+
+ return NULL;
+}
+
+static bool
+vk_pipeline_precomp_shader_serialize(struct vk_pipeline_cache_object *obj,
+ struct blob *blob)
+{
+ struct vk_pipeline_precomp_shader *shader =
+ vk_pipeline_precomp_shader_from_cache_obj(obj);
+
+ blob_write_uint32(blob, shader->stage);
+ blob_write_bytes(blob, &shader->rs, sizeof(shader->rs));
+ blob_write_bytes(blob, &shader->tess, sizeof(shader->tess));
+ blob_write_bytes(blob, shader->blake3, sizeof(shader->blake3));
+ blob_write_uint64(blob, shader->nir_blob.size);
+ blob_write_bytes(blob, shader->nir_blob.data, shader->nir_blob.size);
+
+ return !blob->out_of_memory;
+}
+
+static struct vk_pipeline_cache_object *
+vk_pipeline_precomp_shader_deserialize(struct vk_pipeline_cache *cache,
+ const void *key_data, size_t key_size,
+ struct blob_reader *blob)
+{
+ struct vk_device *device = cache->base.device;
+
+ struct vk_pipeline_precomp_shader *shader =
+ vk_zalloc(&device->alloc, sizeof(*shader), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (shader == NULL)
+ return NULL;
+
+ assert(sizeof(shader->cache_key) == key_size);
+ memcpy(shader->cache_key, key_data, sizeof(shader->cache_key));
+
+ vk_pipeline_cache_object_init(device, &shader->cache_obj,
+ &pipeline_precomp_shader_cache_ops,
+ shader->cache_key,
+ sizeof(shader->cache_key));
+
+ shader->stage = blob_read_uint32(blob);
+ blob_copy_bytes(blob, &shader->rs, sizeof(shader->rs));
+ blob_copy_bytes(blob, &shader->tess, sizeof(shader->tess));
+ blob_copy_bytes(blob, shader->blake3, sizeof(shader->blake3));
+
+ uint64_t nir_size = blob_read_uint64(blob);
+ if (blob->overrun || nir_size > SIZE_MAX)
+ goto fail_shader;
+
+ const void *nir_data = blob_read_bytes(blob, nir_size);
+ if (blob->overrun)
+ goto fail_shader;
+
+ blob_init(&shader->nir_blob);
+ blob_write_bytes(&shader->nir_blob, nir_data, nir_size);
+ if (shader->nir_blob.out_of_memory)
+ goto fail_nir_blob;
+
+ return &shader->cache_obj;
+
+fail_nir_blob:
+ blob_finish(&shader->nir_blob);
+fail_shader:
+ vk_pipeline_cache_object_finish(&shader->cache_obj);
+ vk_free(&device->alloc, shader);
+
+ return NULL;
+}
+
+static void
+vk_pipeline_precomp_shader_destroy(struct vk_device *device,
+ struct vk_pipeline_cache_object *obj)
+{
+ struct vk_pipeline_precomp_shader *shader =
+ vk_pipeline_precomp_shader_from_cache_obj(obj);
+
+ blob_finish(&shader->nir_blob);
+ vk_pipeline_cache_object_finish(&shader->cache_obj);
+ vk_free(&device->alloc, shader);
+}
+
+static nir_shader *
+vk_pipeline_precomp_shader_get_nir(const struct vk_pipeline_precomp_shader *shader,
+ const struct nir_shader_compiler_options *nir_options)
+{
+ struct blob_reader blob;
+ blob_reader_init(&blob, shader->nir_blob.data, shader->nir_blob.size);
+
+ nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
+ if (blob.overrun) {
+ ralloc_free(nir);
+ return NULL;
+ }
+
+ return nir;
+}
+
+static const struct vk_pipeline_cache_object_ops pipeline_precomp_shader_cache_ops = {
+ .serialize = vk_pipeline_precomp_shader_serialize,
+ .deserialize = vk_pipeline_precomp_shader_deserialize,
+ .destroy = vk_pipeline_precomp_shader_destroy,
+};
+
+static VkResult
+vk_pipeline_precompile_shader(struct vk_device *device,
+ struct vk_pipeline_cache *cache,
+ VkPipelineCreateFlags2KHR pipeline_flags,
+ const void *pipeline_info_pNext,
+ const VkPipelineShaderStageCreateInfo *info,
+ struct vk_pipeline_precomp_shader **ps_out)
+{
+ const struct vk_device_shader_ops *ops = device->shader_ops;
+ VkResult result;
+
+ struct vk_pipeline_robustness_state rs;
+ vk_pipeline_robustness_state_fill(device, &rs,
+ pipeline_info_pNext,
+ info->pNext);
+
+ uint8_t stage_sha1[SHA1_DIGEST_LENGTH];
+ vk_pipeline_hash_shader_stage(info, &rs, stage_sha1);
+
+ if (cache != NULL) {
+ struct vk_pipeline_cache_object *cache_obj =
+ vk_pipeline_cache_lookup_object(cache, stage_sha1, sizeof(stage_sha1),
+ &pipeline_precomp_shader_cache_ops,
+ NULL /* cache_hit */);
+ if (cache_obj != NULL) {
+ *ps_out = vk_pipeline_precomp_shader_from_cache_obj(cache_obj);
+ return VK_SUCCESS;
+ }
+ }
+
+ if (pipeline_flags &
+ VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
+ return VK_PIPELINE_COMPILE_REQUIRED;
+
+ const gl_shader_stage stage = vk_to_mesa_shader_stage(info->stage);
+ const struct nir_shader_compiler_options *nir_options =
+ ops->get_nir_options(device->physical, stage, &rs);
+ const struct spirv_to_nir_options spirv_options =
+ ops->get_spirv_options(device->physical, stage, &rs);
+
+ nir_shader *nir;
+ result = vk_pipeline_shader_stage_to_nir(device, info, &spirv_options,
+ nir_options, NULL, &nir);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (ops->preprocess_nir != NULL)
+ ops->preprocess_nir(device->physical, nir);
+
+ struct vk_pipeline_precomp_shader *shader =
+ vk_pipeline_precomp_shader_create(device, stage_sha1,
+ sizeof(stage_sha1),
+ &rs, nir);
+ ralloc_free(nir);
+ if (shader == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ if (cache != NULL) {
+ struct vk_pipeline_cache_object *cache_obj = &shader->cache_obj;
+ cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
+ shader = vk_pipeline_precomp_shader_from_cache_obj(cache_obj);
+ }
+
+ *ps_out = shader;
+
+ return VK_SUCCESS;
+}
+
+struct vk_pipeline_stage {
+ gl_shader_stage stage;
+
+ struct vk_pipeline_precomp_shader *precomp;
+ struct vk_shader *shader;
+};
+
+static int
+cmp_vk_pipeline_stages(const void *_a, const void *_b)
+{
+ const struct vk_pipeline_stage *a = _a, *b = _b;
+ return vk_shader_cmp_graphics_stages(a->stage, b->stage);
+}
+
+static bool
+vk_pipeline_stage_is_null(const struct vk_pipeline_stage *stage)
+{
+ return stage->precomp == NULL && stage->shader == NULL;
+}
+
+static void
+vk_pipeline_stage_finish(struct vk_device *device,
+ struct vk_pipeline_stage *stage)
+{
+ if (stage->precomp != NULL)
+ vk_pipeline_precomp_shader_unref(device, stage->precomp);
+
+ if (stage->shader)
+ vk_shader_unref(device, stage->shader);
+}
+
+static struct vk_pipeline_stage
+vk_pipeline_stage_clone(const struct vk_pipeline_stage *in)
+{
+ struct vk_pipeline_stage out = {
+ .stage = in->stage,
+ };
+
+ if (in->precomp)
+ out.precomp = vk_pipeline_precomp_shader_ref(in->precomp);
+
+ if (in->shader)
+ out.shader = vk_shader_ref(in->shader);
+
+ return out;
+}
+
+struct vk_graphics_pipeline {
+ struct vk_pipeline base;
+
+ union {
+ struct {
+ struct vk_graphics_pipeline_all_state all_state;
+ struct vk_graphics_pipeline_state state;
+ } lib;
+
+ struct {
+ struct vk_vertex_input_state _dynamic_vi;
+ struct vk_sample_locations_state _dynamic_sl;
+ struct vk_dynamic_graphics_state dynamic;
+ } linked;
+ };
+
+ uint32_t set_layout_count;
+ struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS];
+
+ uint32_t stage_count;
+ struct vk_pipeline_stage stages[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES];
+};
+
+static void
+vk_graphics_pipeline_destroy(struct vk_device *device,
+ struct vk_pipeline *pipeline,
+ const VkAllocationCallbacks *pAllocator)
+{
+ struct vk_graphics_pipeline *gfx_pipeline =
+ container_of(pipeline, struct vk_graphics_pipeline, base);
+
+ for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++)
+ vk_pipeline_stage_finish(device, &gfx_pipeline->stages[i]);
+
+ for (uint32_t i = 0; i < gfx_pipeline->set_layout_count; i++) {
+ if (gfx_pipeline->set_layouts[i] != NULL)
+ vk_descriptor_set_layout_unref(device, gfx_pipeline->set_layouts[i]);
+ }
+
+ vk_pipeline_free(device, pAllocator, pipeline);
+}
+
+static bool
+vk_device_supports_stage(struct vk_device *device,
+ gl_shader_stage stage)
+{
+ const struct vk_features *features = &device->physical->supported_features;
+
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
+ return true;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ return features->tessellationShader;
+ case MESA_SHADER_GEOMETRY:
+ return features->geometryShader;
+ case MESA_SHADER_TASK:
+ return features->taskShader;
+ case MESA_SHADER_MESH:
+ return features->meshShader;
+ default:
+ return false;
+ }
+}
+
+static const gl_shader_stage all_gfx_stages[] = {
+ MESA_SHADER_VERTEX,
+ MESA_SHADER_TESS_CTRL,
+ MESA_SHADER_TESS_EVAL,
+ MESA_SHADER_GEOMETRY,
+ MESA_SHADER_TASK,
+ MESA_SHADER_MESH,
+ MESA_SHADER_FRAGMENT,
+};
+
+static void
+vk_graphics_pipeline_cmd_bind(struct vk_command_buffer *cmd_buffer,
+ struct vk_pipeline *pipeline)
+{
+ struct vk_device *device = cmd_buffer->base.device;
+ const struct vk_device_shader_ops *ops = device->shader_ops;
+
+ struct vk_graphics_pipeline *gfx_pipeline = NULL;
+ struct vk_shader *stage_shader[PIPE_SHADER_MESH_TYPES] = { NULL, };
+ if (pipeline != NULL) {
+ assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
+ assert(!(pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR));
+ gfx_pipeline = container_of(pipeline, struct vk_graphics_pipeline, base);
+
+ for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) {
+ struct vk_shader *shader = gfx_pipeline->stages[i].shader;
+ stage_shader[shader->stage] = shader;
+ }
+ }
+
+ uint32_t stage_count = 0;
+ gl_shader_stage stages[ARRAY_SIZE(all_gfx_stages)];
+ struct vk_shader *shaders[ARRAY_SIZE(all_gfx_stages)];
+
+ VkShaderStageFlags vk_stages = 0;
+ for (uint32_t i = 0; i < ARRAY_SIZE(all_gfx_stages); i++) {
+ gl_shader_stage stage = all_gfx_stages[i];
+ if (!vk_device_supports_stage(device, stage)) {
+ assert(stage_shader[stage] == NULL);
+ continue;
+ }
+
+ vk_stages |= mesa_to_vk_shader_stage(stage);
+
+ stages[stage_count] = stage;
+ shaders[stage_count] = stage_shader[stage];
+ stage_count++;
+ }
+ ops->cmd_bind_shaders(cmd_buffer, stage_count, stages, shaders);
+
+ if (gfx_pipeline != NULL) {
+ cmd_buffer->pipeline_shader_stages |= vk_stages;
+ ops->cmd_set_dynamic_graphics_state(cmd_buffer,
+ &gfx_pipeline->linked.dynamic);
+ } else {
+ cmd_buffer->pipeline_shader_stages &= ~vk_stages;
+ }
+}
+
+static VkShaderCreateFlagsEXT
+vk_pipeline_to_shader_flags(VkPipelineCreateFlags2KHR pipeline_flags,
+ gl_shader_stage stage)
+{
+ VkShaderCreateFlagsEXT shader_flags = 0;
+
+ if (pipeline_flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)
+ shader_flags |= VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA;
+
+ if (stage == MESA_SHADER_FRAGMENT) {
+ if (pipeline_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
+ shader_flags |= VK_SHADER_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_EXT;
+
+ if (pipeline_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT)
+ shader_flags |= VK_SHADER_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT;
+ }
+
+ if (stage == MESA_SHADER_COMPUTE) {
+ if (pipeline_flags & VK_PIPELINE_CREATE_2_DISPATCH_BASE_BIT_KHR)
+ shader_flags |= VK_SHADER_CREATE_DISPATCH_BASE_BIT_EXT;
+ }
+
+ return shader_flags;
+}
+
+static VkResult
+vk_graphics_pipeline_compile_shaders(struct vk_device *device,
+ struct vk_pipeline_cache *cache,
+ struct vk_graphics_pipeline *pipeline,
+ struct vk_pipeline_layout *pipeline_layout,
+ const struct vk_graphics_pipeline_state *state,
+ uint32_t stage_count,
+ struct vk_pipeline_stage *stages,
+ VkPipelineCreationFeedback *stage_feedbacks)
+{
+ const struct vk_device_shader_ops *ops = device->shader_ops;
+ VkResult result;
+
+ if (stage_count == 0)
+ return VK_SUCCESS;
+
+ /* If we're linking, throw away any previously compiled shaders as they
+ * likely haven't been properly linked. We keep the precompiled shaders
+ * and we still look it up in the cache so it may still be fast.
+ */
+ if (pipeline->base.flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT) {
+ for (uint32_t i = 0; i < stage_count; i++) {
+ if (stages[i].shader != NULL) {
+ vk_shader_unref(device, stages[i].shader);
+ stages[i].shader = NULL;
+ }
+ }
+ }
+
+ bool have_all_shaders = true;
+ VkShaderStageFlags all_stages = 0;
+ struct vk_pipeline_precomp_shader *tcs_precomp = NULL, *tes_precomp = NULL;
+ for (uint32_t i = 0; i < stage_count; i++) {
+ all_stages |= mesa_to_vk_shader_stage(stages[i].stage);
+
+ if (stages[i].shader == NULL)
+ have_all_shaders = false;
+
+ if (stages[i].stage == MESA_SHADER_TESS_CTRL)
+ tcs_precomp = stages[i].precomp;
+
+ if (stages[i].stage == MESA_SHADER_TESS_EVAL)
+ tes_precomp = stages[i].precomp;
+ }
+
+ /* If we already have a shader for each stage, there's nothing to do. */
+ if (have_all_shaders)
+ return VK_SUCCESS;
+
+ struct vk_pipeline_tess_info tess_info = { ._pad = 0 };
+ if (tcs_precomp != NULL && tes_precomp != NULL) {
+ tess_info = tcs_precomp->tess;
+ vk_pipeline_tess_info_merge(&tess_info, &tes_precomp->tess);
+ }
+
+ struct mesa_blake3 blake3_ctx;
+ _mesa_blake3_init(&blake3_ctx);
+ for (uint32_t i = 0; i < pipeline->set_layout_count; i++) {
+ if (pipeline->set_layouts[i] != NULL) {
+ _mesa_blake3_update(&blake3_ctx, pipeline->set_layouts[i]->blake3,
+ sizeof(pipeline->set_layouts[i]->blake3));
+ }
+ }
+ if (pipeline_layout != NULL) {
+ _mesa_blake3_update(&blake3_ctx, &pipeline_layout->push_ranges,
+ sizeof(pipeline_layout->push_ranges[0]) *
+ pipeline_layout->push_range_count);
+ }
+ blake3_hash layout_blake3;
+ _mesa_blake3_final(&blake3_ctx, layout_blake3);
+
+ /* Partition the shaders */
+ uint32_t part_count;
+ uint32_t partition[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES + 1] = { 0 };
+ if (pipeline->base.flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT) {
+ partition[1] = stage_count;
+ part_count = 1;
+ } else if (ops->link_geom_stages) {
+ if (stages[0].stage == MESA_SHADER_FRAGMENT) {
+ assert(stage_count == 1);
+ partition[1] = stage_count;
+ part_count = 1;
+ } else if (stages[stage_count - 1].stage == MESA_SHADER_FRAGMENT) {
+ /* In this case we have both */
+ assert(stage_count > 1);
+ partition[1] = stage_count - 1;
+ partition[2] = stage_count;
+ part_count = 2;
+ } else {
+ /* In this case we only have geometry */
+ partition[1] = stage_count;
+ part_count = 1;
+ }
+ } else {
+ /* Otherwise, we're don't want to link anything */
+ part_count = stage_count;
+ for (uint32_t i = 0; i < stage_count; i++)
+ partition[i + 1] = i + 1;
+ }
+
+ for (uint32_t p = 0; p < part_count; p++) {
+ const int64_t part_start = os_time_get_nano();
+
+ struct vk_shader_pipeline_cache_key shader_key = { 0 };
+
+ _mesa_blake3_init(&blake3_ctx);
+
+ VkShaderStageFlags part_stages = 0;
+ for (uint32_t i = partition[p]; i < partition[p + 1]; i++) {
+ const struct vk_pipeline_stage *stage = &stages[i];
+
+ part_stages |= mesa_to_vk_shader_stage(stage->stage);
+ _mesa_blake3_update(&blake3_ctx, stage->precomp->blake3,
+ sizeof(stage->precomp->blake3));
+
+ VkShaderCreateFlagsEXT shader_flags =
+ vk_pipeline_to_shader_flags(pipeline->base.flags, stage->stage);
+ _mesa_blake3_update(&blake3_ctx, &shader_flags, sizeof(shader_flags));
+ }
+
+ blake3_hash state_blake3;
+ ops->hash_graphics_state(device->physical, state,
+ part_stages, state_blake3);
+
+ _mesa_blake3_update(&blake3_ctx, state_blake3, sizeof(state_blake3));
+ _mesa_blake3_update(&blake3_ctx, layout_blake3, sizeof(layout_blake3));
+
+ if (part_stages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
+ _mesa_blake3_update(&blake3_ctx, &tess_info, sizeof(tess_info));
+
+ /* The set of geometry stages used together is used to generate the
+ * nextStage mask as well as VK_SHADER_CREATE_NO_TASK_SHADER_BIT_EXT.
+ */
+ const VkShaderStageFlags geom_stages =
+ all_stages & ~VK_SHADER_STAGE_FRAGMENT_BIT;
+ _mesa_blake3_update(&blake3_ctx, &geom_stages, sizeof(geom_stages));
+
+ _mesa_blake3_final(&blake3_ctx, shader_key.blake3);
+
+ if (cache != NULL) {
+ /* From the Vulkan 1.3.278 spec:
+ *
+ * "VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT
+ * indicates that a readily usable pipeline or pipeline stage was
+ * found in the pipelineCache specified by the application in the
+ * pipeline creation command.
+ *
+ * [...]
+ *
+ * Note
+ *
+ * Implementations are encouraged to provide a meaningful signal
+ * to applications using this bit. The intention is to communicate
+ * to the application that the pipeline or pipeline stage was
+ * created “as fast as it gets” using the pipeline cache provided
+ * by the application. If an implementation uses an internal
+ * cache, it is discouraged from setting this bit as the feedback
+ * would be unactionable."
+ *
+ * The cache_hit value returned by vk_pipeline_cache_lookup_object()
+ * is only set to true when the shader is found in the provided
+ * pipeline cache. It is left false if we fail to find it in the
+ * memory cache but find it in the disk cache even though that's
+ * still a cache hit from the perspective of the compile pipeline.
+ */
+ bool all_shaders_found = true;
+ bool all_cache_hits = true;
+ for (uint32_t i = partition[p]; i < partition[p + 1]; i++) {
+ struct vk_pipeline_stage *stage = &stages[i];
+
+ shader_key.stage = stage->stage;
+
+ bool cache_hit = false;
+ struct vk_pipeline_cache_object *cache_obj =
+ vk_pipeline_cache_lookup_object(cache, &shader_key,
+ sizeof(shader_key),
+ &pipeline_shader_cache_ops,
+ &cache_hit);
+ if (cache_obj != NULL) {
+ stage->shader = vk_shader_from_cache_obj(cache_obj);
+ } else {
+ all_shaders_found = false;
+ }
+
+ if (cache_obj == NULL && !cache_hit)
+ all_cache_hits = false;
+ }
+
+ if (all_cache_hits) {
+ /* The pipeline cache only really helps if we hit for everything
+ * in the partition. Otherwise, we have to go re-compile it all
+ * anyway.
+ */
+ for (uint32_t i = partition[p]; i < partition[p + 1]; i++) {
+ struct vk_pipeline_stage *stage = &stages[i];
+
+ stage_feedbacks[stage->stage].flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
+ }
+ }
+
+ if (all_shaders_found) {
+ /* Update duration to take cache lookups into account */
+ const int64_t part_end = os_time_get_nano();
+ for (uint32_t i = partition[p]; i < partition[p + 1]; i++) {
+ struct vk_pipeline_stage *stage = &stages[i];
+ stage_feedbacks[stage->stage].duration += part_end - part_start;
+ }
+ continue;
+ }
+ }
+
+ if (pipeline->base.flags &
+ VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
+ return VK_PIPELINE_COMPILE_REQUIRED;
+
+ struct vk_shader_compile_info infos[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES];
+ for (uint32_t i = partition[p]; i < partition[p + 1]; i++) {
+ struct vk_pipeline_stage *stage = &stages[i];
+
+ VkShaderCreateFlagsEXT shader_flags =
+ vk_pipeline_to_shader_flags(pipeline->base.flags, stage->stage);
+
+ if (partition[p + 1] - partition[p] > 1)
+ shader_flags |= VK_SHADER_CREATE_LINK_STAGE_BIT_EXT;
+
+ if ((part_stages & VK_SHADER_STAGE_MESH_BIT_EXT) &&
+ !(geom_stages & VK_SHADER_STAGE_TASK_BIT_EXT))
+ shader_flags = VK_SHADER_CREATE_NO_TASK_SHADER_BIT_EXT;
+
+ VkShaderStageFlags next_stage;
+ if (stage->stage == MESA_SHADER_FRAGMENT) {
+ next_stage = 0;
+ } else if (i + 1 < stage_count) {
+ /* We hash geom_stages above so this is safe */
+ next_stage = mesa_to_vk_shader_stage(stages[i + 1].stage);
+ } else {
+ /* We're the last geometry stage */
+ next_stage = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ const struct nir_shader_compiler_options *nir_options =
+ ops->get_nir_options(device->physical, stage->stage,
+ &stage->precomp->rs);
+
+ nir_shader *nir =
+ vk_pipeline_precomp_shader_get_nir(stage->precomp, nir_options);
+ if (nir == NULL) {
+ for (uint32_t j = partition[p]; j < i; j++)
+ ralloc_free(infos[i].nir);
+
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ if (stage->stage == MESA_SHADER_TESS_CTRL ||
+ stage->stage == MESA_SHADER_TESS_EVAL)
+ vk_pipeline_replace_nir_tess_info(nir, &tess_info);
+
+ const VkPushConstantRange *push_range = NULL;
+ if (pipeline_layout != NULL) {
+ for (uint32_t r = 0; r < pipeline_layout->push_range_count; r++) {
+ if (pipeline_layout->push_ranges[r].stageFlags &
+ mesa_to_vk_shader_stage(stage->stage)) {
+ assert(push_range == NULL);
+ push_range = &pipeline_layout->push_ranges[r];
+ }
+ }
+ }
+
+ infos[i] = (struct vk_shader_compile_info) {
+ .stage = stage->stage,
+ .flags = shader_flags,
+ .next_stage_mask = next_stage,
+ .nir = nir,
+ .robustness = &stage->precomp->rs,
+ .set_layout_count = pipeline->set_layout_count,
+ .set_layouts = pipeline->set_layouts,
+ .push_constant_range_count = push_range != NULL,
+ .push_constant_ranges = push_range != NULL ? push_range : NULL,
+ };
+ }
+
+ /* vk_shader_ops::compile() consumes the NIR regardless of whether or
+ * not it succeeds and only generates shaders on success. Once this
+ * returns, we own the shaders but not the NIR in infos.
+ */
+ struct vk_shader *shaders[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES];
+ result = ops->compile(device, partition[p + 1] - partition[p],
+ &infos[partition[p]],
+ state,
+ &device->alloc,
+ &shaders[partition[p]]);
+ if (result != VK_SUCCESS)
+ return result;
+
+ const int64_t part_end = os_time_get_nano();
+ for (uint32_t i = partition[p]; i < partition[p + 1]; i++) {
+ struct vk_pipeline_stage *stage = &stages[i];
+
+ if (stage->shader == NULL) {
+ shader_key.stage = stage->stage;
+ vk_shader_init_cache_obj(device, shaders[i], &shader_key,
+ sizeof(shader_key));
+
+ struct vk_pipeline_cache_object *cache_obj =
+ &shaders[i]->pipeline.cache_obj;
+ if (cache != NULL)
+ cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
+
+ stage->shader = vk_shader_from_cache_obj(cache_obj);
+ } else {
+ /* This can fail to happen if only some of the shaders were found
+ * in the pipeline cache. In this case, we just throw away the
+ * shader as vk_pipeline_cache_add_object() would throw it away
+ * for us anyway.
+ */
+ vk_shader_destroy(device, shaders[i], &device->alloc);
+ }
+
+ stage_feedbacks[stage->stage].duration += part_end - part_start;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+vk_graphics_pipeline_get_executable_properties(
+ struct vk_device *device,
+ struct vk_pipeline *pipeline,
+ uint32_t *executable_count,
+ VkPipelineExecutablePropertiesKHR *properties)
+{
+ struct vk_graphics_pipeline *gfx_pipeline =
+ container_of(pipeline, struct vk_graphics_pipeline, base);
+ VkResult result;
+
+ if (properties == NULL) {
+ *executable_count = 0;
+ for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) {
+ struct vk_shader *shader = gfx_pipeline->stages[i].shader;
+
+ uint32_t shader_exec_count = 0;
+ result = shader->ops->get_executable_properties(device, shader,
+ &shader_exec_count,
+ NULL);
+ assert(result == VK_SUCCESS);
+ *executable_count += shader_exec_count;
+ }
+ } else {
+ uint32_t arr_len = *executable_count;
+ *executable_count = 0;
+ for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) {
+ struct vk_shader *shader = gfx_pipeline->stages[i].shader;
+
+ uint32_t shader_exec_count = arr_len - *executable_count;
+ result = shader->ops->get_executable_properties(device, shader,
+ &shader_exec_count,
+ &properties[*executable_count]);
+ if (result != VK_SUCCESS)
+ return result;
+
+ *executable_count += shader_exec_count;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+static inline struct vk_shader *
+vk_graphics_pipeline_executable_shader(struct vk_device *device,
+ struct vk_graphics_pipeline *gfx_pipeline,
+ uint32_t *executable_index)
+{
+ for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) {
+ struct vk_shader *shader = gfx_pipeline->stages[i].shader;
+
+ uint32_t shader_exec_count = 0;
+ shader->ops->get_executable_properties(device, shader,
+ &shader_exec_count, NULL);
+
+ if (*executable_index < shader_exec_count)
+ return shader;
+ else
+ *executable_index -= shader_exec_count;
+ }
+
+ return NULL;
+}
+
+static VkResult
+vk_graphics_pipeline_get_executable_statistics(
+ struct vk_device *device,
+ struct vk_pipeline *pipeline,
+ uint32_t executable_index,
+ uint32_t *statistic_count,
+ VkPipelineExecutableStatisticKHR *statistics)
+{
+ struct vk_graphics_pipeline *gfx_pipeline =
+ container_of(pipeline, struct vk_graphics_pipeline, base);
+
+ struct vk_shader *shader =
+ vk_graphics_pipeline_executable_shader(device, gfx_pipeline,
+ &executable_index);
+ if (shader == NULL) {
+ *statistic_count = 0;
+ return VK_SUCCESS;
+ }
+
+ return shader->ops->get_executable_statistics(device, shader,
+ executable_index,
+ statistic_count,
+ statistics);
+}
+
+static VkResult
+vk_graphics_pipeline_get_internal_representations(
+ struct vk_device *device,
+ struct vk_pipeline *pipeline,
+ uint32_t executable_index,
+ uint32_t *internal_representation_count,
+ VkPipelineExecutableInternalRepresentationKHR* internal_representations)
+{
+ struct vk_graphics_pipeline *gfx_pipeline =
+ container_of(pipeline, struct vk_graphics_pipeline, base);
+
+ struct vk_shader *shader =
+ vk_graphics_pipeline_executable_shader(device, gfx_pipeline,
+ &executable_index);
+ if (shader == NULL) {
+ *internal_representation_count = 0;
+ return VK_SUCCESS;
+ }
+
+ return shader->ops->get_executable_internal_representations(
+ device, shader, executable_index,
+ internal_representation_count, internal_representations);
+}
+
+static const struct vk_pipeline_ops vk_graphics_pipeline_ops = {
+ .destroy = vk_graphics_pipeline_destroy,
+ .get_executable_statistics = vk_graphics_pipeline_get_executable_statistics,
+ .get_executable_properties = vk_graphics_pipeline_get_executable_properties,
+ .get_internal_representations = vk_graphics_pipeline_get_internal_representations,
+ .cmd_bind = vk_graphics_pipeline_cmd_bind,
+};
+
+static VkResult
+vk_create_graphics_pipeline(struct vk_device *device,
+ struct vk_pipeline_cache *cache,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipeline)
+{
+ VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
+ const int64_t pipeline_start = os_time_get_nano();
+ VkResult result;
+
+ const VkPipelineCreateFlags2KHR pipeline_flags =
+ vk_graphics_pipeline_create_flags(pCreateInfo);
+
+ const VkPipelineCreationFeedbackCreateInfo *feedback_info =
+ vk_find_struct_const(pCreateInfo->pNext,
+ PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
+
+ const VkPipelineLibraryCreateInfoKHR *libs_info =
+ vk_find_struct_const(pCreateInfo->pNext,
+ PIPELINE_LIBRARY_CREATE_INFO_KHR);
+
+ struct vk_graphics_pipeline *pipeline =
+ vk_pipeline_zalloc(device, &vk_graphics_pipeline_ops,
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline_flags, pAllocator, sizeof(*pipeline));
+ if (pipeline == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ struct vk_pipeline_stage stages[PIPE_SHADER_MESH_TYPES];
+ memset(stages, 0, sizeof(stages));
+
+ VkPipelineCreationFeedback stage_feedbacks[PIPE_SHADER_MESH_TYPES];
+ memset(stage_feedbacks, 0, sizeof(stage_feedbacks));
+
+ struct vk_graphics_pipeline_state state_tmp, *state;
+ struct vk_graphics_pipeline_all_state all_state_tmp, *all_state;
+ if (pipeline->base.flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) {
+ /* For pipeline libraries, the state is stored in the pipeline */
+ state = &pipeline->lib.state;
+ all_state = &pipeline->lib.all_state;
+ } else {
+ /* For linked pipelines, we throw the state away at the end of pipeline
+ * creation and only keep the dynamic state.
+ */
+ memset(&state_tmp, 0, sizeof(state_tmp));
+ state = &state_tmp;
+ all_state = &all_state_tmp;
+ }
+
+ /* If we have libraries, import them first. */
+ if (libs_info) {
+ for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
+ VK_FROM_HANDLE(vk_pipeline, lib_pipeline, libs_info->pLibraries[i]);
+ assert(lib_pipeline->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
+ assert(lib_pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR);
+ struct vk_graphics_pipeline *lib_gfx_pipeline =
+ container_of(lib_pipeline, struct vk_graphics_pipeline, base);
+
+ vk_graphics_pipeline_state_merge(state, &lib_gfx_pipeline->lib.state);
+
+ pipeline->set_layout_count = MAX2(pipeline->set_layout_count,
+ lib_gfx_pipeline->set_layout_count);
+ for (uint32_t i = 0; i < lib_gfx_pipeline->set_layout_count; i++) {
+ if (lib_gfx_pipeline->set_layouts[i] == NULL)
+ continue;
+
+ if (pipeline->set_layouts[i] == NULL) {
+ pipeline->set_layouts[i] =
+ vk_descriptor_set_layout_ref(lib_gfx_pipeline->set_layouts[i]);
+ }
+ }
+
+ for (uint32_t i = 0; i < lib_gfx_pipeline->stage_count; i++) {
+ const struct vk_pipeline_stage *lib_stage =
+ &lib_gfx_pipeline->stages[i];
+
+ /* We shouldn't have duplicated stages in the imported pipeline
+ * but it's cheap enough to protect against it so we may as well.
+ */
+ assert(lib_stage->stage < ARRAY_SIZE(stages));
+ assert(vk_pipeline_stage_is_null(&stages[lib_stage->stage]));
+ if (!vk_pipeline_stage_is_null(&stages[lib_stage->stage]))
+ continue;
+
+ stages[lib_stage->stage] = vk_pipeline_stage_clone(lib_stage);
+ }
+ }
+ }
+
+ result = vk_graphics_pipeline_state_fill(device, state,
+ pCreateInfo,
+ NULL /* driver_rp */,
+ 0 /* driver_rp_flags */,
+ all_state,
+ NULL, 0, NULL);
+ if (result != VK_SUCCESS)
+ goto fail_stages;
+
+ if (!(pipeline->base.flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) {
+ pipeline->linked.dynamic.vi = &pipeline->linked._dynamic_vi;
+ pipeline->linked.dynamic.ms.sample_locations =
+ &pipeline->linked._dynamic_sl;
+ vk_dynamic_graphics_state_fill(&pipeline->linked.dynamic, &state_tmp);
+ }
+
+ if (pipeline_layout != NULL) {
+ pipeline->set_layout_count = MAX2(pipeline->set_layout_count,
+ pipeline_layout->set_count);
+ for (uint32_t i = 0; i < pipeline_layout->set_count; i++) {
+ if (pipeline_layout->set_layouts[i] == NULL)
+ continue;
+
+ if (pipeline->set_layouts[i] == NULL) {
+ pipeline->set_layouts[i] =
+ vk_descriptor_set_layout_ref(pipeline_layout->set_layouts[i]);
+ }
+ }
+ }
+
+ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+ const VkPipelineShaderStageCreateInfo *stage_info =
+ &pCreateInfo->pStages[i];
+
+ const int64_t stage_start = os_time_get_nano();
+
+ assert(util_bitcount(stage_info->stage) == 1);
+ if (!(state->shader_stages & stage_info->stage))
+ continue;
+
+ gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
+ assert(vk_device_supports_stage(device, stage));
+
+ stage_feedbacks[stage].flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
+
+ if (!vk_pipeline_stage_is_null(&stages[stage]))
+ continue;
+
+ struct vk_pipeline_precomp_shader *precomp;
+ result = vk_pipeline_precompile_shader(device, cache, pipeline_flags,
+ pCreateInfo->pNext,
+ stage_info,
+ &precomp);
+ if (result != VK_SUCCESS)
+ goto fail_stages;
+
+ stages[stage] = (struct vk_pipeline_stage) {
+ .stage = stage,
+ .precomp = precomp,
+ };
+
+ const int64_t stage_end = os_time_get_nano();
+ stage_feedbacks[stage].duration += stage_end - stage_start;
+ }
+
+ /* Compact the array of stages */
+ uint32_t stage_count = 0;
+ for (uint32_t s = 0; s < ARRAY_SIZE(stages); s++) {
+ assert(s >= stage_count);
+ if (!vk_pipeline_stage_is_null(&stages[s]))
+ stages[stage_count++] = stages[s];
+ }
+ for (uint32_t s = stage_count; s < ARRAY_SIZE(stages); s++)
+ memset(&stages[s], 0, sizeof(stages[s]));
+
+ /* Sort so we always give the driver shaders in order.
+ *
+ * This makes everything easier for everyone. This also helps stabilize
+ * shader keys so that we get a cache hit even if the client gives us
+ * the stages in a different order.
+ */
+ qsort(stages, stage_count, sizeof(*stages), cmp_vk_pipeline_stages);
+
+ result = vk_graphics_pipeline_compile_shaders(device, cache, pipeline,
+ pipeline_layout, state,
+ stage_count, stages,
+ stage_feedbacks);
+ if (result != VK_SUCCESS)
+ goto fail_stages;
+
+ /* Throw away precompiled shaders unless the client explicitly asks us to
+ * keep them.
+ */
+ if (!(pipeline_flags &
+ VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT)) {
+ for (uint32_t i = 0; i < stage_count; i++) {
+ if (stages[i].precomp != NULL) {
+ vk_pipeline_precomp_shader_unref(device, stages[i].precomp);
+ stages[i].precomp = NULL;
+ }
+ }
+ }
+
+ pipeline->stage_count = stage_count;
+ for (uint32_t i = 0; i < stage_count; i++)
+ pipeline->stages[i] = stages[i];
+
+ const int64_t pipeline_end = os_time_get_nano();
+ if (feedback_info != NULL) {
+ VkPipelineCreationFeedback pipeline_feedback = {
+ .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
+ .duration = pipeline_end - pipeline_start,
+ };
+
+ /* From the Vulkan 1.3.275 spec:
+ *
+ * "An implementation should set the
+ * VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT
+ * bit if it was able to avoid the large majority of pipeline or
+ * pipeline stage creation work by using the pipelineCache parameter"
+ *
+ * We really shouldn't set this bit unless all the shaders hit the
+ * cache.
+ */
+ uint32_t cache_hit_count = 0;
+ for (uint32_t i = 0; i < stage_count; i++) {
+ const gl_shader_stage stage = stages[i].stage;
+ if (stage_feedbacks[stage].flags &
+ VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT)
+ cache_hit_count++;
+ }
+ if (cache_hit_count > 0 && cache_hit_count == stage_count) {
+ pipeline_feedback.flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
+ }
+
+ *feedback_info->pPipelineCreationFeedback = pipeline_feedback;
+
+ /* VUID-VkGraphicsPipelineCreateInfo-pipelineStageCreationFeedbackCount-06594 */
+ assert(feedback_info->pipelineStageCreationFeedbackCount == 0 ||
+ feedback_info->pipelineStageCreationFeedbackCount ==
+ pCreateInfo->stageCount);
+ for (uint32_t i = 0;
+ i < feedback_info->pipelineStageCreationFeedbackCount; i++) {
+ const gl_shader_stage stage =
+ vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage);
+
+ feedback_info->pPipelineStageCreationFeedbacks[i] =
+ stage_feedbacks[stage];
+ }
+ }
+
+ *pPipeline = vk_pipeline_to_handle(&pipeline->base);
+
+ return VK_SUCCESS;
+
+fail_stages:
+ for (uint32_t i = 0; i < ARRAY_SIZE(stages); i++)
+ vk_pipeline_stage_finish(device, &stages[i]);
+
+ vk_graphics_pipeline_destroy(device, &pipeline->base, pAllocator);
+
+ return result;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+vk_common_CreateGraphicsPipelines(VkDevice _device,
+ VkPipelineCache pipelineCache,
+ uint32_t createInfoCount,
+ const VkGraphicsPipelineCreateInfo *pCreateInfos,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
+{
+ VK_FROM_HANDLE(vk_device, device, _device);
+ VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache);
+ VkResult first_error_or_success = VK_SUCCESS;
+
+ /* From the Vulkan 1.3.274 spec:
+ *
+ * "When attempting to create many pipelines in a single command, it is
+ * possible that creation may fail for a subset of them. In this case,
+ * the corresponding elements of pPipelines will be set to
+ * VK_NULL_HANDLE.
+ */
+ memset(pPipelines, 0, createInfoCount * sizeof(*pPipelines));
+
+ unsigned i = 0;
+ for (; i < createInfoCount; i++) {
+ VkResult result = vk_create_graphics_pipeline(device, cache,
+ &pCreateInfos[i],
+ pAllocator,
+ &pPipelines[i]);
+ if (result == VK_SUCCESS)
+ continue;
+
+ if (first_error_or_success == VK_SUCCESS)
+ first_error_or_success = result;
+
+ /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
+ * is not obvious what error should be report upon 2 different failures.
+ */
+ if (result != VK_PIPELINE_COMPILE_REQUIRED)
+ return result;
+
+ const VkPipelineCreateFlags2KHR flags =
+ vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
+ if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
+ return result;
+ }
+
+ return first_error_or_success;
+}
+
+struct vk_compute_pipeline {
+ struct vk_pipeline base;
+ struct vk_shader *shader;
+};
+
+static void
+vk_compute_pipeline_destroy(struct vk_device *device,
+ struct vk_pipeline *pipeline,
+ const VkAllocationCallbacks *pAllocator)
+{
+ struct vk_compute_pipeline *comp_pipeline =
+ container_of(pipeline, struct vk_compute_pipeline, base);
+
+ vk_shader_unref(device, comp_pipeline->shader);
+ vk_pipeline_free(device, pAllocator, pipeline);
+}
+
+static void
+vk_compute_pipeline_cmd_bind(struct vk_command_buffer *cmd_buffer,
+ struct vk_pipeline *pipeline)
+{
+ struct vk_device *device = cmd_buffer->base.device;
+ const struct vk_device_shader_ops *ops = device->shader_ops;
+
+ struct vk_shader *shader = NULL;
+ if (pipeline != NULL) {
+ assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
+ struct vk_compute_pipeline *comp_pipeline =
+ container_of(pipeline, struct vk_compute_pipeline, base);
+
+ shader = comp_pipeline->shader;
+
+ cmd_buffer->pipeline_shader_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
+ } else {
+ cmd_buffer->pipeline_shader_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT;
+ }
+
+ gl_shader_stage stage = MESA_SHADER_COMPUTE;
+ ops->cmd_bind_shaders(cmd_buffer, 1, &stage, &shader);
+}
+
+static VkResult
+vk_pipeline_compile_compute_stage(struct vk_device *device,
+ struct vk_pipeline_cache *cache,
+ struct vk_compute_pipeline *pipeline,
+ struct vk_pipeline_layout *pipeline_layout,
+ struct vk_pipeline_stage *stage,
+ bool *cache_hit)
+{
+ const struct vk_device_shader_ops *ops = device->shader_ops;
+ VkResult result;
+
+ const VkPushConstantRange *push_range = NULL;
+ if (pipeline_layout != NULL) {
+ for (uint32_t r = 0; r < pipeline_layout->push_range_count; r++) {
+ if (pipeline_layout->push_ranges[r].stageFlags &
+ VK_SHADER_STAGE_COMPUTE_BIT) {
+ assert(push_range == NULL);
+ push_range = &pipeline_layout->push_ranges[r];
+ }
+ }
+ }
+
+ VkShaderCreateFlagsEXT shader_flags =
+ vk_pipeline_to_shader_flags(pipeline->base.flags, MESA_SHADER_COMPUTE);
+
+ struct mesa_blake3 blake3_ctx;
+ _mesa_blake3_init(&blake3_ctx);
+
+ _mesa_blake3_update(&blake3_ctx, stage->precomp->blake3,
+ sizeof(stage->precomp->blake3));
+
+ _mesa_blake3_update(&blake3_ctx, &shader_flags, sizeof(shader_flags));
+
+ for (uint32_t i = 0; i < pipeline_layout->set_count; i++) {
+ if (pipeline_layout->set_layouts[i] != NULL) {
+ _mesa_blake3_update(&blake3_ctx,
+ pipeline_layout->set_layouts[i]->blake3,
+ sizeof(pipeline_layout->set_layouts[i]->blake3));
+ }
+ }
+ if (push_range != NULL)
+ _mesa_blake3_update(&blake3_ctx, push_range, sizeof(*push_range));
+
+ struct vk_shader_pipeline_cache_key shader_key = {
+ .stage = MESA_SHADER_COMPUTE,
+ };
+ _mesa_blake3_final(&blake3_ctx, shader_key.blake3);
+
+ if (cache != NULL) {
+ struct vk_pipeline_cache_object *cache_obj =
+ vk_pipeline_cache_lookup_object(cache, &shader_key,
+ sizeof(shader_key),
+ &pipeline_shader_cache_ops,
+ cache_hit);
+ if (cache_obj != NULL) {
+ stage->shader = vk_shader_from_cache_obj(cache_obj);
+ return VK_SUCCESS;
+ }
+ }
+
+ if (pipeline->base.flags &
+ VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
+ return VK_PIPELINE_COMPILE_REQUIRED;
+
+ const struct nir_shader_compiler_options *nir_options =
+ ops->get_nir_options(device->physical, stage->stage,
+ &stage->precomp->rs);
+
+ nir_shader *nir = vk_pipeline_precomp_shader_get_nir(stage->precomp,
+ nir_options);
+ if (nir == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* vk_device_shader_ops::compile() consumes the NIR regardless of whether
+ * or not it succeeds and only generates shaders on success. Once compile()
+ * returns, we own the shaders but not the NIR in infos.
+ */
+ struct vk_shader_compile_info compile_info = {
+ .stage = stage->stage,
+ .flags = shader_flags,
+ .next_stage_mask = 0,
+ .nir = nir,
+ .robustness = &stage->precomp->rs,
+ .set_layout_count = pipeline_layout->set_count,
+ .set_layouts = pipeline_layout->set_layouts,
+ .push_constant_range_count = push_range != NULL,
+ .push_constant_ranges = push_range != NULL ? push_range : NULL,
+ };
+
+ struct vk_shader *shader;
+ result = ops->compile(device, 1, &compile_info, NULL,
+ &device->alloc, &shader);
+ if (result != VK_SUCCESS)
+ return result;
+
+ vk_shader_init_cache_obj(device, shader, &shader_key, sizeof(shader_key));
+
+ struct vk_pipeline_cache_object *cache_obj = &shader->pipeline.cache_obj;
+ if (cache != NULL)
+ cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
+
+ stage->shader = vk_shader_from_cache_obj(cache_obj);
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+vk_compute_pipeline_get_executable_properties(
+ struct vk_device *device,
+ struct vk_pipeline *pipeline,
+ uint32_t *executable_count,
+ VkPipelineExecutablePropertiesKHR *properties)
+{
+ struct vk_compute_pipeline *comp_pipeline =
+ container_of(pipeline, struct vk_compute_pipeline, base);
+ struct vk_shader *shader = comp_pipeline->shader;
+
+ return shader->ops->get_executable_properties(device, shader,
+ executable_count,
+ properties);
+}
+
+static VkResult
+vk_compute_pipeline_get_executable_statistics(
+ struct vk_device *device,
+ struct vk_pipeline *pipeline,
+ uint32_t executable_index,
+ uint32_t *statistic_count,
+ VkPipelineExecutableStatisticKHR *statistics)
+{
+ struct vk_compute_pipeline *comp_pipeline =
+ container_of(pipeline, struct vk_compute_pipeline, base);
+ struct vk_shader *shader = comp_pipeline->shader;
+
+ return shader->ops->get_executable_statistics(device, shader,
+ executable_index,
+ statistic_count,
+ statistics);
+}
+
+static VkResult
+vk_compute_pipeline_get_internal_representations(
+ struct vk_device *device,
+ struct vk_pipeline *pipeline,
+ uint32_t executable_index,
+ uint32_t *internal_representation_count,
+ VkPipelineExecutableInternalRepresentationKHR* internal_representations)
+{
+ struct vk_compute_pipeline *comp_pipeline =
+ container_of(pipeline, struct vk_compute_pipeline, base);
+ struct vk_shader *shader = comp_pipeline->shader;
+
+ return shader->ops->get_executable_internal_representations(
+ device, shader, executable_index,
+ internal_representation_count, internal_representations);
+}
+
+static const struct vk_pipeline_ops vk_compute_pipeline_ops = {
+ .destroy = vk_compute_pipeline_destroy,
+ .get_executable_statistics = vk_compute_pipeline_get_executable_statistics,
+ .get_executable_properties = vk_compute_pipeline_get_executable_properties,
+ .get_internal_representations = vk_compute_pipeline_get_internal_representations,
+ .cmd_bind = vk_compute_pipeline_cmd_bind,
+};
+
+static VkResult
+vk_create_compute_pipeline(struct vk_device *device,
+ struct vk_pipeline_cache *cache,
+ const VkComputePipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipeline)
+{
+ VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
+ int64_t pipeline_start = os_time_get_nano();
+ VkResult result;
+
+ const VkPipelineCreateFlags2KHR pipeline_flags =
+ vk_compute_pipeline_create_flags(pCreateInfo);
+
+ const VkPipelineCreationFeedbackCreateInfo *feedback_info =
+ vk_find_struct_const(pCreateInfo->pNext,
+ PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
+
+ struct vk_compute_pipeline *pipeline =
+ vk_pipeline_zalloc(device, &vk_compute_pipeline_ops,
+ VK_PIPELINE_BIND_POINT_COMPUTE,
+ pipeline_flags, pAllocator, sizeof(*pipeline));
+ if (pipeline == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ struct vk_pipeline_stage stage = {
+ .stage = MESA_SHADER_COMPUTE,
+ };
+ result = vk_pipeline_precompile_shader(device, cache, pipeline_flags,
+ pCreateInfo->pNext,
+ &pCreateInfo->stage,
+ &stage.precomp);
+ if (result != VK_SUCCESS)
+ goto fail_pipeline;
+
+ bool cache_hit;
+ result = vk_pipeline_compile_compute_stage(device, cache, pipeline,
+ pipeline_layout, &stage,
+ &cache_hit);
+ if (result != VK_SUCCESS)
+ goto fail_stage;
+
+ if (stage.precomp != NULL)
+ vk_pipeline_precomp_shader_unref(device, stage.precomp);
+ pipeline->shader = stage.shader;
+
+ const int64_t pipeline_end = os_time_get_nano();
+ if (feedback_info != NULL) {
+ VkPipelineCreationFeedback pipeline_feedback = {
+ .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
+ .duration = pipeline_end - pipeline_start,
+ };
+ if (cache_hit) {
+ pipeline_feedback.flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
+ }
+
+ *feedback_info->pPipelineCreationFeedback = pipeline_feedback;
+ if (feedback_info->pipelineStageCreationFeedbackCount > 0) {
+ feedback_info->pPipelineStageCreationFeedbacks[0] =
+ pipeline_feedback;
+ }
+ }
+
+ *pPipeline = vk_pipeline_to_handle(&pipeline->base);
+
+ return VK_SUCCESS;
+
+fail_stage:
+ vk_pipeline_stage_finish(device, &stage);
+fail_pipeline:
+ vk_pipeline_free(device, pAllocator, &pipeline->base);
+
+ return result;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+vk_common_CreateComputePipelines(VkDevice _device,
+ VkPipelineCache pipelineCache,
+ uint32_t createInfoCount,
+ const VkComputePipelineCreateInfo *pCreateInfos,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
+{
+ VK_FROM_HANDLE(vk_device, device, _device);
+ VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache);
+ VkResult first_error_or_success = VK_SUCCESS;
+
+ /* From the Vulkan 1.3.274 spec:
+ *
+ * "When attempting to create many pipelines in a single command, it is
+ * possible that creation may fail for a subset of them. In this case,
+ * the corresponding elements of pPipelines will be set to
+ * VK_NULL_HANDLE.
+ */
+ memset(pPipelines, 0, createInfoCount * sizeof(*pPipelines));
+
+ unsigned i = 0;
+ for (; i < createInfoCount; i++) {
+ VkResult result = vk_create_compute_pipeline(device, cache,
+ &pCreateInfos[i],
+ pAllocator,
+ &pPipelines[i]);
+ if (result == VK_SUCCESS)
+ continue;
+
+ if (first_error_or_success == VK_SUCCESS)
+ first_error_or_success = result;
+
+ /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
+ * is not obvious what error should be report upon 2 different failures.
+ */
+ if (result != VK_PIPELINE_COMPILE_REQUIRED)
+ return result;
+
+ const VkPipelineCreateFlags2KHR flags =
+ vk_compute_pipeline_create_flags(&pCreateInfos[i]);
+ if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
+ return result;
+ }
+
+ return first_error_or_success;
+}
+
+void
+vk_cmd_unbind_pipelines_for_stages(struct vk_command_buffer *cmd_buffer,
+ VkShaderStageFlags stages)
+{
+ stages &= cmd_buffer->pipeline_shader_stages;
+
+ if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT)
+ vk_graphics_pipeline_cmd_bind(cmd_buffer, NULL);
+
+ if (stages & VK_SHADER_STAGE_COMPUTE_BIT)
+ vk_compute_pipeline_cmd_bind(cmd_buffer, NULL);
+}
diff --git a/src/vulkan/runtime/vk_pipeline.h b/src/vulkan/runtime/vk_pipeline.h
index 62ae730e1e4..ed05d567a8f 100644
--- a/src/vulkan/runtime/vk_pipeline.h
+++ b/src/vulkan/runtime/vk_pipeline.h
@@ -199,6 +199,10 @@ void vk_pipeline_free(struct vk_device *device,
const VkAllocationCallbacks *alloc,
struct vk_pipeline *pipeline);
+void
+vk_cmd_unbind_pipelines_for_stages(struct vk_command_buffer *cmd_buffer,
+ VkShaderStageFlags stages);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/vulkan/runtime/vk_shader.c b/src/vulkan/runtime/vk_shader.c
index 41c41b2f363..6f377fa6f21 100644
--- a/src/vulkan/runtime/vk_shader.c
+++ b/src/vulkan/runtime/vk_shader.c
@@ -369,7 +369,10 @@ vk_common_GetShaderBinaryDataEXT(VkDevice _device,
return result;
}
-#define VK_MAX_LINKED_SHADER_STAGES 5
+/* The only place where we have "real" linking is graphics shaders and there
+ * is a limit as to how many of them can be linked together at one time.
+ */
+#define VK_MAX_LINKED_SHADER_STAGES MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_CreateShadersEXT(VkDevice _device,
@@ -552,10 +555,16 @@ vk_common_CmdBindShadersEXT(VkCommandBuffer commandBuffer,
STACK_ARRAY(gl_shader_stage, stages, stageCount);
STACK_ARRAY(struct vk_shader *, shaders, stageCount);
+ VkShaderStageFlags vk_stages = 0;
for (uint32_t i = 0; i < stageCount; i++) {
+ vk_stages |= pStages[i];
stages[i] = vk_to_mesa_shader_stage(pStages[i]);
shaders[i] = pShaders != NULL ? vk_shader_from_handle(pShaders[i]) : NULL;
}
+ vk_cmd_unbind_pipelines_for_stages(cmd_buffer, vk_stages);
+ if (vk_stages & ~VK_SHADER_STAGE_COMPUTE_BIT)
+ vk_cmd_set_rp_attachments(cmd_buffer, ~0);
+
ops->cmd_bind_shaders(cmd_buffer, stageCount, stages, shaders);
}
diff --git a/src/vulkan/runtime/vk_shader.h b/src/vulkan/runtime/vk_shader.h
index 0ee6e7681c3..8fb5090b129 100644
--- a/src/vulkan/runtime/vk_shader.h
+++ b/src/vulkan/runtime/vk_shader.h
@@ -28,6 +28,8 @@
#include "vk_limits.h"
#include "vk_pipeline_cache.h"
+#include "util/mesa-blake3.h"
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -45,6 +47,8 @@ struct vk_pipeline_robustness_state;
int vk_shader_cmp_graphics_stages(gl_shader_stage a, gl_shader_stage b);
+#define VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA 0x1000
+
struct vk_shader_compile_info {
gl_shader_stage stage;
VkShaderCreateFlagsEXT flags;
@@ -62,12 +66,30 @@ struct vk_shader_compile_info {
struct vk_shader_ops;
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic error "-Wpadded"
+#endif
+struct vk_shader_pipeline_cache_key {
+ gl_shader_stage stage;
+ blake3_hash blake3;
+};
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
struct vk_shader {
struct vk_object_base base;
const struct vk_shader_ops *ops;
gl_shader_stage stage;
+
+ /* Used for the generic VkPipeline implementation */
+ struct {
+ struct vk_pipeline_cache_object cache_obj;
+ struct vk_shader_pipeline_cache_key cache_key;
+ } pipeline;
};
VK_DEFINE_NONDISP_HANDLE_CASTS(vk_shader, base, VkShaderEXT,
@@ -90,6 +112,39 @@ struct vk_shader_ops {
bool (*serialize)(struct vk_device *device,
const struct vk_shader *shader,
struct blob *blob);
+
+ /** Returns executable properties for this shader
+ *
+ * This is equivalent to vkGetPipelineExecutableProperties(), only for a
+ * single vk_shader.
+ */
+ VkResult (*get_executable_properties)(struct vk_device *device,
+ const struct vk_shader *shader,
+ uint32_t *executable_count,
+ VkPipelineExecutablePropertiesKHR *properties);
+
+ /** Returns executable statistics for this shader
+ *
+ * This is equivalent to vkGetPipelineExecutableStatistics(), only for a
+ * single vk_shader.
+ */
+ VkResult (*get_executable_statistics)(struct vk_device *device,
+ const struct vk_shader *shader,
+ uint32_t executable_index,
+ uint32_t *statistic_count,
+ VkPipelineExecutableStatisticKHR *statistics);
+
+ /** Returns executable internal representations for this shader
+ *
+ * This is equivalent to vkGetPipelineExecutableInternalRepresentations(),
+ * only for a single vk_shader.
+ */
+ VkResult (*get_executable_internal_representations)(
+ struct vk_device *device,
+ const struct vk_shader *shader,
+ uint32_t executable_index,
+ uint32_t *internal_representation_count,
+ VkPipelineExecutableInternalRepresentationKHR *internal_representations);
};
void *vk_shader_zalloc(struct vk_device *device,
@@ -143,6 +198,23 @@ struct vk_device_shader_ops {
*/
void (*preprocess_nir)(struct vk_physical_device *device, nir_shader *nir);
+ /** True if the driver wants geometry stages linked
+ *
+ * If set to true, geometry stages will always be compiled with
+ * VK_SHADER_CREATE_LINK_STAGE_BIT_EXT when pipelines are used.
+ */
+ bool link_geom_stages;
+
+ /** Hash a vk_graphics_state object
+ *
+ * This callback hashes whatever bits of vk_graphics_pipeline_state might
+ * be used to compile a shader in one of the given stages.
+ */
+ void (*hash_graphics_state)(struct vk_physical_device *device,
+ const struct vk_graphics_pipeline_state *state,
+ VkShaderStageFlags stages,
+ blake3_hash blake3_out);
+
/** Compile (and potentially link) a set of shaders
*
* Unlike vkCreateShadersEXT, this callback will only ever be called with
@@ -175,6 +247,10 @@ struct vk_device_shader_ops {
uint32_t stage_count,
const gl_shader_stage *stages,
struct vk_shader ** const shaders);
+
+ /** Sets dynamic state */
+ void (*cmd_set_dynamic_graphics_state)(struct vk_command_buffer *cmd_buffer,
+ const struct vk_dynamic_graphics_state *state);
};
#ifdef __cplusplus