diff options
author | Samuel Pitoiset <samuel.pitoiset@gmail.com> | 2018-04-06 10:23:56 +0200 |
---|---|---|
committer | Samuel Pitoiset <samuel.pitoiset@gmail.com> | 2018-04-06 10:23:56 +0200 |
commit | acee7b888285c635f17b36cd13f8a6b6065b88e2 (patch) | |
tree | 88104e0ffc430b031acbe9fcf7b28ddf4b828488 |
initial commit; import existing files
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
-rw-r--r-- | CMakeLists.txt | 19 | ||||
-rw-r--r-- | README | 44 | ||||
-rw-r--r-- | VkLayer_vkpipeline_db.json | 21 | ||||
-rw-r--r-- | blob.c | 404 | ||||
-rw-r--r-- | blob.h | 355 | ||||
-rwxr-xr-x | radv-report.py | 700 | ||||
-rw-r--r-- | run.c | 539 | ||||
-rw-r--r-- | serialize.c | 959 | ||||
-rw-r--r-- | serialize.h | 75 | ||||
-rw-r--r-- | vkpipeline_db.cpp | 1074 |
10 files changed, 4190 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..3e1546c --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.7) +cmake_policy(VERSION 3.7) +project(vkpipeline-db) + +find_package(Vulkan REQUIRED) + +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall") +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall") +if(CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wextra -Wundef") + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wextra -Wundef") +endif(CMAKE_COMPILER_IS_GNUCXX) + +file(GLOB SOURCES blob.c serialize.c) + +add_library(VkLayer_vkpipeline_db SHARED vkpipeline_db.cpp ${SOURCES}) + +add_executable(run run.c ${SOURCES}) +target_link_libraries(run ${Vulkan_LIBRARY}) @@ -0,0 +1,44 @@ +=== What === + +vkpipeline-db is quite similar to shader-db, but for Vulkan. It allows to capture +graphics and compute pipelines (including shaders and states) and to replay +them in order to get shader stats. + +Currently vkpipeline-db can only report shader stats if VK_AMD_shader_info is +exposed by the underlying Vulkan driver. + +=== Compiling === + +$ mkdir build +$ cd build +$ cmake .. +$ make + +=== Capturing shaders === + +One component of vkpipeline-db is a Vulkan layer that allows to capture +pipelines. + +# Set up the layer +$ export VK_LAYER_PATH=$VK_LAYER_PATH:path_to_VkLayer_vkpipeline_db.json +$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:path_to_libVkLayer_vkpipeline_db.so +$ export VK_INSTANCE_LAYERS=VK_LAYER_vkpipeline_db + +# Create directory for pipeline files +$ mkdir dirpath + +# Set up the pipeline capture directory. +$ export VKPIPELINE_DB_CAPTURE_PATH=dirpath + +# Run your application +$ yourapp + +All captured pipelines should be in 'dirpath'. + +=== Running shaders === + +$ ./run dirpath + +=== Analysis === + +$ ./radv-report.py old-run new-run diff --git a/VkLayer_vkpipeline_db.json b/VkLayer_vkpipeline_db.json new file mode 100644 index 0000000..2cddcea --- /dev/null +++ b/VkLayer_vkpipeline_db.json @@ -0,0 +1,21 @@ +{ + "file_format_version" : "1.0.0", + "layer" : { + "name": "VK_LAYER_vkpipeline_db", + "type": "GLOBAL", + "library_path": "libVkLayer_vkpipeline_db.so", + "api_version": "1.0.61", + "implementation_version": "1", + "description": "vkpipeline-db capture layer", + "functions": { + "vkGetInstanceProcAddr": "vkpipeline_db_GetInstanceProcAddr", + "vkGetDeviceProcAddr": "vkpipeline_db_GetDeviceProcAddr" + }, + "enable_environment": { + "ENABLE_VKPIPELINE_DB": "1" + }, + "disable_environment": { + "DISABLE_VKPIPELINE_DB": "1" + } + } +} @@ -0,0 +1,404 @@ +/* + * File imported from mesa which is: + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <string.h> +#include <assert.h> +#include <stdlib.h> + +#include "blob.h" + +#ifdef HAVE_VALGRIND +#include <valgrind.h> +#include <memcheck.h> +#define VG(x) x +#else +#define VG(x) +#endif + +#define BLOB_INITIAL_SIZE 4096 + +/** + * Align a value up to an alignment value + * + * If \c value is not already aligned to the requested alignment value, it + * will be rounded up. + * + * \param value Value to be rounded + * \param alignment Alignment value to be used. This must be a power of two. + * + * \sa ROUND_DOWN_TO() + */ +static uintptr_t +ALIGN(uintptr_t value, int32_t alignment) +{ + assert((alignment > 0) && !(alignment & (alignment - 1))); + return (((value) + (alignment) - 1) & ~((alignment) - 1)); +} + +/* Ensure that \blob will be able to fit an additional object of size + * \additional. The growing (if any) will occur by doubling the existing + * allocation. + */ +static bool +grow_to_fit(struct blob *blob, size_t additional) +{ + size_t to_allocate; + uint8_t *new_data; + + if (blob->out_of_memory) + return false; + + if (blob->size + additional <= blob->allocated) + return true; + + if (blob->fixed_allocation) { + blob->out_of_memory = true; + return false; + } + + if (blob->allocated == 0) + to_allocate = BLOB_INITIAL_SIZE; + else + to_allocate = blob->allocated * 2; + + to_allocate = MAX2(to_allocate, blob->allocated + additional); + + new_data = realloc(blob->data, to_allocate); + if (new_data == NULL) { + blob->out_of_memory = true; + return false; + } + + blob->data = new_data; + blob->allocated = to_allocate; + + return true; +} + +/* Align the blob->size so that reading or writing a value at (blob->data + + * blob->size) will result in an access aligned to a granularity of \alignment + * bytes. + * + * \return True unless allocation fails + */ +static bool +align_blob(struct blob *blob, size_t alignment) +{ + const size_t new_size = ALIGN(blob->size, alignment); + + if (blob->size < new_size) { + if (!grow_to_fit(blob, new_size - blob->size)) + return false; + + if (blob->data) + memset(blob->data + blob->size, 0, new_size - blob->size); + blob->size = new_size; + } + + return true; +} + +static void +align_blob_reader(struct blob_reader *blob, size_t alignment) +{ + blob->current = blob->data + ALIGN(blob->current - blob->data, alignment); +} + +void +blob_init(struct blob *blob) +{ + blob->data = NULL; + blob->allocated = 0; + blob->size = 0; + blob->fixed_allocation = false; + blob->out_of_memory = false; +} + +void +blob_init_fixed(struct blob *blob, void *data, size_t size) +{ + blob->data = data; + blob->allocated = size; + blob->size = 0; + blob->fixed_allocation = true; + blob->out_of_memory = false; +} + +bool +blob_overwrite_bytes(struct blob *blob, + size_t offset, + const void *bytes, + size_t to_write) +{ + /* Detect an attempt to overwrite data out of bounds. */ + if (offset + to_write < offset || blob->size < offset + to_write) + return false; + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(bytes, to_write)); + + if (blob->data) + memcpy(blob->data + offset, bytes, to_write); + + return true; +} + +bool +blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write) +{ + if (! grow_to_fit(blob, to_write)) + return false; + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(bytes, to_write)); + + if (blob->data) + memcpy(blob->data + blob->size, bytes, to_write); + blob->size += to_write; + + return true; +} + +intptr_t +blob_reserve_bytes(struct blob *blob, size_t to_write) +{ + intptr_t ret; + + if (! grow_to_fit (blob, to_write)) + return -1; + + ret = blob->size; + blob->size += to_write; + + return ret; +} + +intptr_t +blob_reserve_uint32(struct blob *blob) +{ + align_blob(blob, sizeof(uint32_t)); + return blob_reserve_bytes(blob, sizeof(uint32_t)); +} + +intptr_t +blob_reserve_intptr(struct blob *blob) +{ + align_blob(blob, sizeof(intptr_t)); + return blob_reserve_bytes(blob, sizeof(intptr_t)); +} + +bool +blob_write_uint32(struct blob *blob, uint32_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +#define ASSERT_ALIGNED(_offset, _align) \ + assert(ALIGN((_offset), (_align)) == (_offset)) + +bool +blob_overwrite_uint32 (struct blob *blob, + size_t offset, + uint32_t value) +{ + ASSERT_ALIGNED(offset, sizeof(value)); + return blob_overwrite_bytes(blob, offset, &value, sizeof(value)); +} + +bool +blob_write_uint64(struct blob *blob, uint64_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +bool +blob_write_intptr(struct blob *blob, intptr_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +bool +blob_overwrite_intptr (struct blob *blob, + size_t offset, + intptr_t value) +{ + ASSERT_ALIGNED(offset, sizeof(value)); + return blob_overwrite_bytes(blob, offset, &value, sizeof(value)); +} + +bool +blob_write_string(struct blob *blob, const char *str) +{ + return blob_write_bytes(blob, str, strlen(str) + 1); +} + +void +blob_reader_init(struct blob_reader *blob, const void *data, size_t size) +{ + blob->data = data; + blob->end = blob->data + size; + blob->current = data; + blob->overrun = false; +} + +/* Check that an object of size \size can be read from this blob. + * + * If not, set blob->overrun to indicate that we attempted to read too far. + */ +static bool +ensure_can_read(struct blob_reader *blob, size_t size) +{ + if (blob->overrun) + return false; + + if (blob->current <= blob->end && blob->end - blob->current >= size) + return true; + + blob->overrun = true; + + return false; +} + +const void * +blob_read_bytes(struct blob_reader *blob, size_t size) +{ + const void *ret; + + if (! ensure_can_read (blob, size)) + return NULL; + + ret = blob->current; + + blob->current += size; + + return ret; +} + +void +blob_copy_bytes(struct blob_reader *blob, void *dest, size_t size) +{ + const void *bytes; + + bytes = blob_read_bytes(blob, size); + if (bytes == NULL) + return; + + memcpy(dest, bytes, size); +} + +/* These next three read functions have identical form. If we add any beyond + * these first three we should probably switch to generating these with a + * preprocessor macro. +*/ +uint32_t +blob_read_uint32(struct blob_reader *blob) +{ + uint32_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((uint32_t*) blob->current); + + blob->current += size; + + return ret; +} + +uint64_t +blob_read_uint64(struct blob_reader *blob) +{ + uint64_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((uint64_t*) blob->current); + + blob->current += size; + + return ret; +} + +intptr_t +blob_read_intptr(struct blob_reader *blob) +{ + intptr_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((intptr_t *) blob->current); + + blob->current += size; + + return ret; +} + +char * +blob_read_string(struct blob_reader *blob) +{ + int size; + char *ret; + uint8_t *nul; + + /* If we're already at the end, then this is an overrun. */ + if (blob->current >= blob->end) { + blob->overrun = true; + return NULL; + } + + /* Similarly, if there is no zero byte in the data remaining in this blob, + * we also consider that an overrun. + */ + nul = memchr(blob->current, 0, blob->end - blob->current); + + if (nul == NULL) { + blob->overrun = true; + return NULL; + } + + size = nul - blob->current + 1; + + assert(ensure_can_read(blob, size)); + + ret = (char *) blob->current; + + blob->current += size; + + return ret; +} @@ -0,0 +1,355 @@ +/* + * File imported from mesa which is: + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BLOB_H +#define BLOB_H + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +#define MAX2(a, b) ((a) > (b) ? (a) : (b)) + +#ifdef __cplusplus +extern "C" { +#endif + +/* The blob functions implement a simple, low-level API for serializing and + * deserializing. + * + * All objects written to a blob will be serialized directly, (without any + * additional meta-data to describe the data written). Therefore, it is the + * caller's responsibility to ensure that any data can be read later, (either + * by knowing exactly what data is expected, or by writing to the blob + * sufficient meta-data to describe what has been written). + * + * A blob is efficient in that it dynamically grows by doubling in size, so + * allocation costs are logarithmic. + */ + +struct blob { + /* The data actually written to the blob. */ + uint8_t *data; + + /** Number of bytes that have been allocated for \c data. */ + size_t allocated; + + /** The number of bytes that have actual data written to them. */ + size_t size; + + /** True if \c data a fixed allocation that we cannot resize + * + * \see blob_init_fixed + */ + bool fixed_allocation; + + /** + * True if we've ever failed to realloc or if we go pas the end of a fixed + * allocation blob. + */ + bool out_of_memory; +}; + +/* When done reading, the caller can ensure that everything was consumed by + * checking the following: + * + * 1. blob->current should be equal to blob->end, (if not, too little was + * read). + * + * 2. blob->overrun should be false, (otherwise, too much was read). + */ +struct blob_reader { + const uint8_t *data; + const uint8_t *end; + const uint8_t *current; + bool overrun; +}; + +/** + * Init a new, empty blob. + */ +void +blob_init(struct blob *blob); + +/** + * Init a new, fixed-size blob. + * + * A fixed-size blob has a fixed block of data that will not be freed on + * blob_finish and will never be grown. If we hit the end, we simply start + * returning false from the write functions. + * + * If a fixed-size blob has a NULL data pointer then the data is written but + * it otherwise operates normally. This can be used to determine the size + * that will be required to write a given data structure. + */ +void +blob_init_fixed(struct blob *blob, void *data, size_t size); + +/** + * Finish a blob and free its memory. + * + * If \blob was initialized with blob_init_fixed, the data pointer is + * considered to be owned by the user and will not be freed. + */ +static inline void +blob_finish(struct blob *blob) +{ + if (!blob->fixed_allocation) + free(blob->data); +} + +/** + * Add some unstructured, fixed-size data to a blob. + * + * \return True unless allocation failed. + */ +bool +blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write); + +/** + * Reserve space in \blob for a number of bytes. + * + * Space will be allocated within the blob for these byes, but the bytes will + * be left uninitialized. The caller is expected to use \sa + * blob_overwrite_bytes to write to these bytes. + * + * \return An offset to space allocated within \blob to which \to_write bytes + * can be written, (or -1 in case of any allocation error). + */ +intptr_t +blob_reserve_bytes(struct blob *blob, size_t to_write); + +/** + * Similar to \sa blob_reserve_bytes, but only reserves an uint32_t worth of + * space. Note that this must be used if later reading with \sa + * blob_read_uint32, since it aligns the offset correctly. + */ +intptr_t +blob_reserve_uint32(struct blob *blob); + +/** + * Similar to \sa blob_reserve_bytes, but only reserves an intptr_t worth of + * space. Note that this must be used if later reading with \sa + * blob_read_intptr, since it aligns the offset correctly. + */ +intptr_t +blob_reserve_intptr(struct blob *blob); + +/** + * Overwrite some data previously written to the blob. + * + * Writes data to an existing portion of the blob at an offset of \offset. + * This data range must have previously been written to the blob by one of the + * blob_write_* calls. + * + * For example usage, see blob_overwrite_uint32 + * + * \return True unless the requested offset or offset+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_bytes(struct blob *blob, + size_t offset, + const void *bytes, + size_t to_write); + +/** + * Add a uint32_t to a blob. + * + * \note This function will only write to a uint32_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_uint32(struct blob *blob, uint32_t value); + +/** + * Overwrite a uint32_t previously written to the blob. + * + * Writes a uint32_t value to an existing portion of the blob at an offset of + * \offset. This data range must have previously been written to the blob by + * one of the blob_write_* calls. + * + * + * The expected usage is something like the following pattern: + * + * size_t offset; + * + * offset = blob_reserve_uint32(blob); + * ... various blob write calls, writing N items ... + * blob_overwrite_uint32 (blob, offset, N); + * + * \return True unless the requested position or position+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_uint32(struct blob *blob, + size_t offset, + uint32_t value); + +/** + * Add a uint64_t to a blob. + * + * \note This function will only write to a uint64_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_uint64(struct blob *blob, uint64_t value); + +/** + * Add an intptr_t to a blob. + * + * \note This function will only write to an intptr_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_intptr(struct blob *blob, intptr_t value); + +/** + * Overwrite an intptr_t previously written to the blob. + * + * Writes a intptr_t value to an existing portion of the blob at an offset of + * \offset. This data range must have previously been written to the blob by + * one of the blob_write_* calls. + * + * For example usage, see blob_overwrite_uint32 + * + * \return True unless the requested position or position+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_intptr(struct blob *blob, + size_t offset, + intptr_t value); + +/** + * Add a NULL-terminated string to a blob, (including the NULL terminator). + * + * \return True unless allocation failed. + */ +bool +blob_write_string(struct blob *blob, const char *str); + +/** + * Start reading a blob, (initializing the contents of \blob for reading). + * + * After this call, the caller can use the various blob_read_* functions to + * read elements from the data array. + * + * For all of the blob_read_* functions, if there is insufficient data + * remaining, the functions will do nothing, (perhaps returning default values + * such as 0). The caller can detect this by noting that the blob_reader's + * current value is unchanged before and after the call. + */ +void +blob_reader_init(struct blob_reader *blob, const void *data, size_t size); + +/** + * Read some unstructured, fixed-size data from the current location, (and + * update the current location to just past this data). + * + * \note The memory returned belongs to the data underlying the blob reader. The + * caller must copy the data in order to use it after the lifetime of the data + * underlying the blob reader. + * + * \return The bytes read (see note above about memory lifetime). + */ +const void * +blob_read_bytes(struct blob_reader *blob, size_t size); + +/** + * Read some unstructured, fixed-size data from the current location, copying + * it to \dest (and update the current location to just past this data) + */ +void +blob_copy_bytes(struct blob_reader *blob, void *dest, size_t size); + +/** + * Read a uint32_t from the current location, (and update the current location + * to just past this uint32_t). + * + * \note This function will only read from a uint32_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The uint32_t read + */ +uint32_t +blob_read_uint32(struct blob_reader *blob); + +/** + * Read a uint64_t from the current location, (and update the current location + * to just past this uint64_t). + * + * \note This function will only read from a uint64_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The uint64_t read + */ +uint64_t +blob_read_uint64(struct blob_reader *blob); + +/** + * Read an intptr_t value from the current location, (and update the + * current location to just past this intptr_t). + * + * \note This function will only read from an intptr_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The intptr_t read + */ +intptr_t +blob_read_intptr(struct blob_reader *blob); + +/** + * Read a NULL-terminated string from the current location, (and update the + * current location to just past this string). + * + * \note The memory returned belongs to the data underlying the blob reader. The + * caller must copy the string in order to use the string after the lifetime + * of the data underlying the blob reader. + * + * \return The string read (see note above about memory lifetime). However, if + * there is no NULL byte remaining within the blob, this function returns + * NULL. + */ +char * +blob_read_string(struct blob_reader *blob); + +#ifdef __cplusplus +} +#endif + +#endif /* BLOB_H */ diff --git a/radv-report.py b/radv-report.py new file mode 100755 index 0000000..881e9cf --- /dev/null +++ b/radv-report.py @@ -0,0 +1,700 @@ +#!/usr/bin/env python +# +# Copyright 2018 Valve Corporation +# +# Based in part on shader-db which is: +# Copyright 2015 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# + +from collections import defaultdict +import itertools +import re +import sys + +set_red = "\033[31m" +set_green = "\033[1;32m" +set_yellow = "\033[1;33m" +set_normal = "\033[0m" + +def format_float(f, suffix = ' %'): + return "{0:0.2f}{1}".format(f, suffix) + +def get_str(value, suffix = ' %'): + if type(value) == float: + return format_float(value, suffix) + else: + return value + +def calculate_percent_change(b, a): + if b == 0: + return 0 if a == 0 else float("inf") + return 100 * float(a - b) / float(b) + +def format_table_cell(n, more_is_better = False, colored = True, is_percent = False): + if is_percent and abs(n) < 0.01: + return " . " + + str = ("{:>8.2f} %" if is_percent else "{:>10}").format(n) + if colored: + if n > 0.5: + str = (set_green if more_is_better else set_red) + str + set_normal + elif n < -0.5: + str = (set_red if more_is_better else set_green) + str + set_normal + return str + + +def format_percent_change(b, a, more_is_better = False, colored = True): + percent = calculate_percent_change(b, a) + return format_table_cell(percent, more_is_better, colored, is_percent = True) + +def cmp_max_unit(current, comp): + return comp[0] > current[0] + +def cmp_min_unit(current, comp): + return comp[0] < current[0] + +def cmp_max_per(current, comp): + return calculate_percent_change(comp[1], comp[2]) > calculate_percent_change(current[1], current[2]) + +def cmp_min_per(current, comp): + return calculate_percent_change(comp[1], comp[2]) < calculate_percent_change(current[1], current[2]) + +class si_stats: + metrics = [ + ('sgprs', 'SGPRS', ''), + ('vgprs', 'VGPRS', ''), + ('spilled_sgprs', 'Spilled SGPRs', ''), + ('spilled_vgprs', 'Spilled VGPRs', ''), + ('privmem_vgprs', 'Private memory VGPRs', ''), + ('scratch_size', 'Scratch size', 'dwords per thread'), + ('code_size', 'Code Size', 'bytes'), + ('lds', 'LDS', 'blocks'), + ('maxwaves', 'Max Waves', ''), + ('waitstates', 'Wait states', ''), + ] + + def __init__(self): + self.error = False + + for name in self.get_metrics(): + self.__dict__[name] = 0 + + self._minmax_testname = {} + + def copy(self): + copy = si_stats() + copy.error = self.error + + for name in self.get_metrics(): + copy.__dict__[name] = self.__dict__[name] + + copy._minmax_testname = self._minmax_testname.copy() + + return copy + + def to_string(self, suffixes = True): + strings = [] + for name, printname, suffix in si_stats.metrics: + string = "{}: {}".format(printname, get_str(self.__dict__[name])) + + if suffixes and len(suffix) > 0: + string += ' ' + suffix + + minmax_testname = self._minmax_testname.get(name) + if minmax_testname is not None: + string += ' (in {})'.format(minmax_testname) + + strings.append(string + '\n') + return ''.join(strings) + + def get_metrics(self): + return [m[0] for m in si_stats.metrics] + + def __str__(self): + return self.to_string() + + def add(self, other): + for name in self.get_metrics(): + self.__dict__[name] += other.__dict__[name] + + def update(self, comp, cmp_fn, testname): + for name in self.get_metrics(): + current = self.__dict__[name] + if type(current) != tuple: + current = (0, 0, 0) + if cmp_fn(current, comp.__dict__[name]): + self.__dict__[name] = comp.__dict__[name] + self._minmax_testname[name] = testname + + def update_max(self, comp): + for name in self.get_metrics(): + current = self.__dict__[name] + if type(current) == tuple: + current = self.__dict__[name][0] + if comp.__dict__[name][0] > current: + self.__dict__[name] = comp.__dict__[name] + + def update_min(self, comp): + for name in self.get_metrics(): + current = self.__dict__[name] + if type(current) == tuple: + current = self.__dict__[name][0] + if comp.__dict__[name][0] < current: + self.__dict__[name] = comp.__dict__[name] + + def update_increase(self, comp): + for name in self.get_metrics(): + if comp.__dict__[name][0] > 0: + self.__dict__[name] += 1 + + def update_decrease(self, comp): + for name in self.get_metrics(): + if comp.__dict__[name][0] < 0: + self.__dict__[name] += 1 + + def is_empty(self): + for name in self.get_metrics(): + x = self.__dict__[name] + if type(x) == tuple and x[0] is not 0: + return False + if type(x) != tuple and x is not 0: + return False + return True + + +class si_parser(object): + re_stats = [ + re.compile( + r"^Shader Stats: SGPRS: ([0-9]+) VGPRS: ([0-9]+) Code Size: ([0-9]+) "+ + r"LDS: ([0-9]+) Scratch: ([0-9]+) Max Waves: ([0-9]+) Spilled SGPRs: "+ + r"([0-9]+) Spilled VGPRs: ([0-9]+) PrivMem VGPRs: ([0-9]+)"), + re.compile( + r"^Shader Stats: SGPRS: ([0-9]+) VGPRS: ([0-9]+) Code Size: ([0-9]+) "+ + r"LDS: ([0-9]+) Scratch: ([0-9]+) Max Waves: ([0-9]+) Spilled SGPRs: "+ + r"([0-9]+) Spilled VGPRs: ([0-9]+)"), + ] + + re_nop = re.compile("^\ts_nop ([0-9]+)") + + def __init__(self): + self._stats = None + self._in_disasm = False + + def finish(self): + return self._stats + + def parse(self, msg): + if not self._in_disasm: + if msg == "Shader Disassembly Begin": + old_stats = self._stats + self._stats = si_stats() + self._in_disasm = True + return old_stats + + for re in si_parser.re_stats: + match = re.match(msg) + if match is not None: + break + + if match is not None: + if self._stats == None: + self._stats = si_stats() + self._stats.sgprs = int(match.group(1)) + self._stats.vgprs = int(match.group(2)) + self._stats.spilled_sgprs = int(match.group(7)) + self._stats.spilled_vgprs = int(match.group(8)) + self._stats.privmem_vgprs = int(match.group(9)) if match.lastindex >= 9 else 0 + self._stats.code_size = int(match.group(3)) + self._stats.lds = int(match.group(4)) + self._stats.scratch_size = int(match.group(5)) / (64 * 4) + self._stats.maxwaves = int(match.group(6)) + old_stats = self._stats + self._stats = None + return old_stats + + if msg == "LLVM compile failed": + old_stats = self._stats + self._stats = None + + if old_stats is None: + old_stats = si_stats() + old_stats.error = True + return old_stats + else: + if msg == "Shader Disassembly End": + self._in_disasm = False + return None + + match = si_parser.re_nop.match(msg) + if match: + self._stats.waitstates += 1 + int(match.groups()[0]) + return None + +def get_results(filename): + """ + Returns a dictionary that maps pipeline_test names to lists of si_stats + (corresponding to the different shaders within the test's programs). + """ + results = defaultdict(list) + parsers = defaultdict(si_parser) + + with open(filename, "r") as file: + re_line = re.compile(r"^(.+\.pipeline_test) - (.*)$") + + for line in file: + match = re_line.match(line) + if match is None: + continue + + name = match.group(1) + message = match.group(2) + + stats = parsers[name].parse(message) + if stats is not None: + results[name].append(stats) + + for name, parser in parsers.items(): + stats = parser.finish() + if stats is not None: + print "Results for", name, "not fully parsed!" + results[name].append(stats) + + return results + + +def compare_stats(before, after): + result = si_stats() + for name in result.get_metrics(): + b = before.__dict__[name] + a = after.__dict__[name] + result.__dict__[name] = (a - b, b, a) + return result + +def subtract_stats(x, y): + result = si_stats() + for name in result.get_metrics(): + result.__dict__[name] = x.__dict__[name] - y.__dict__[name] + return result + +def is_regression(before, after): + for field in before.get_metrics(): + if field == 'maxwaves': + if before.__dict__[field] > after.__dict__[field]: + return True + else: + if before.__dict__[field] < after.__dict__[field]: + return True + return False + +def divide_stats(num, div): + result = si_stats() + for name in result.get_metrics(): + if div.__dict__[name] == 0: + result.__dict__[name] = num.__dict__[name] + else: + result.__dict__[name] = 100.0 * float(num.__dict__[name]) / float(div.__dict__[name]) + return result + +def print_before_after_stats(before, after, divisor = 1): + result = si_stats() + for name in result.get_metrics(): + b = before.__dict__[name] / divisor + a = after.__dict__[name] / divisor + if b == 0: + percent = format_float(0.0) + else: + percent = format_float(100 * float(a - b) / float(b)) + result.__dict__[name] = '{} -> {} ({})'.format(get_str(b,''), get_str(a,''), percent) + + print result + +def print_cmp_stats(comp): + result = comp.copy() + for name in result.get_metrics(): + if type(result.__dict__[name]) != tuple: + a = 0 + b = 0 + else: + b = result.__dict__[name][1] + a = result.__dict__[name][2] + if b == 0: + percent = format_float(0.0) + else: + percent = format_float(100 * float(a - b) / float(b)) + result.__dict__[name] = '{} -> {} ({})'.format(get_str(b,''), get_str(a,''), percent) + + print result + + +def print_count(stats, divisor): + result = si_stats() + for name in result.get_metrics(): + count = stats.__dict__[name] + percent = float(count) / float(divisor) + result.__dict__[name] = '{} ({})'.format(get_str(count,''), get_str(percent)) + print result.to_string(False) + +def compare_results(before_all_results, after_all_results): + total_before = si_stats() + total_after = si_stats() + total_affected_before = si_stats() + total_affected_after = si_stats() + increases = si_stats() + decreases = si_stats() + max_increase_per = si_stats() + max_decrease_per = si_stats() + max_increase_unit = si_stats() + max_decrease_unit = si_stats() + + num_affected = 0 + num_tests = 0 + num_shaders = 0 + num_after_errors = 0 + num_before_errors = 0 + + all_names = set(itertools.chain(before_all_results.keys(), after_all_results.keys())) + + only_after_names = [] + only_before_names = [] + count_mismatch_names = [] + errors_names = [] + + for name in all_names: + before_test_results = before_all_results.get(name) + after_test_results = after_all_results.get(name) + + if before_test_results is None: + only_after_names.append(name) + continue + if after_test_results is None: + only_before_names.append(name) + continue + + if len(before_test_results) != len(after_test_results): + count_mismatch_names.append(name) + + num_tests += 1 + have_error = False + + for before, after in zip(before_test_results, after_test_results): + if before.error: + num_before_errors += 1 + if after.error: + num_after_errors += 1 + if after.error or before.error: + have_error = True + continue + + total_before.add(before) + total_after.add(after) + num_shaders += 1 + + comp = compare_stats(before, after) + if not comp.is_empty(): + num_affected += 1 + total_affected_before.add(before) + total_affected_after.add(after) + increases.update_increase(comp) + decreases.update_decrease(comp) + max_increase_per.update(comp, cmp_max_per, name) + max_decrease_per.update(comp, cmp_min_per, name) + max_increase_unit.update(comp, cmp_max_unit, name) + max_decrease_unit.update(comp, cmp_min_unit, name) + + if have_error: + errors_names.append(name) + + print '{} shaders in {} tests'.format(num_shaders, num_tests) + if num_shaders == 0: + return + + print "Totals:" + print_before_after_stats(total_before, total_after) + print "Totals from affected shaders:" + print_before_after_stats(total_affected_before, total_affected_after) + print "Increases:" + print_count(increases, num_shaders) + print "Decreases:" + print_count(decreases, num_shaders) + + print "*** BY PERCENTAGE ***\n" + print "Max Increase:\n" + print_cmp_stats(max_increase_per) + print "Max Decrease:\n" + print_cmp_stats(max_decrease_per) + + print "*** BY UNIT ***\n" + print "Max Increase:\n" + print_cmp_stats(max_increase_unit) + print "Max Decrease:\n" + print_cmp_stats(max_decrease_unit) + + def report_ignored(names, what): + if names: + print "*** {} are ignored:".format(what) + s = ', '.join(names[:5]) + if len(names) > 5: + s += ', and {} more'.format(len(names) - 5) + print s + + report_ignored(only_after_names, "Tests only in 'after' results") + report_ignored(only_before_names, "Tests only in 'before' results") + report_ignored(count_mismatch_names, "Tests with different number of shaders") + report_ignored(errors_names, "Shaders with compilation errors") + if num_after_errors > 0 or num_before_errors > 0: + print "*** Compile errors encountered! (before: {}, after: {})".format( + num_before_errors, num_after_errors) + +class grouped_stats: + def __init__(self): + self.num_shaders = 0 + self.before = si_stats() + self.after = si_stats() + self.diff = si_stats() + + def add(self, before, after): + self.num_shaders += 1 + self.before.add(before) + self.after.add(after) + + def set_one_shader(self, before, after): + self.before = before + self.after = after + self.diff = subtract_stats(after, before) + + def print_vgpr_spilling_app(self, name): + if (self.after.spilled_vgprs > 0 or + self.after.privmem_vgprs > 0): + print " {:22}{:6}{:10}{:10}{:10}".format( + name, + self.num_shaders, + self.after.spilled_vgprs, + self.after.privmem_vgprs, + self.after.scratch_size) + + def print_one_shader_vgpr_spill(self, name): + if (self.after.spilled_vgprs > 0 or + self.after.privmem_vgprs > 0): + print " {:65}{:10}{:10}{:10}{:10}".format( + name, + self.after.vgprs, + self.after.spilled_vgprs, + self.after.privmem_vgprs, + self.after.scratch_size) + + def print_sgpr_spilling_app(self, name): + if self.after.spilled_sgprs > 0: + print " {:22}{:6}{:10}{:>9.1f}".format( + name, + self.num_shaders, + self.after.spilled_sgprs, + float(self.after.spilled_sgprs) / float(self.num_shaders)) + + def print_one_shader_sgpr_spill(self, name): + if self.after.spilled_sgprs > 0: + print " {:65}{:10}{:10}".format( + name, + self.after.sgprs, + self.after.spilled_sgprs) + + def print_percentages(self, name): + print " {:22}{:6}{}{}{}{}{}{}{}{}{}".format( + name, + self.num_shaders, + format_percent_change(self.before.sgprs, self.after.sgprs), + format_percent_change(self.before.vgprs, self.after.vgprs), + format_percent_change(self.before.spilled_sgprs, self.after.spilled_sgprs), + format_percent_change(self.before.spilled_vgprs, self.after.spilled_vgprs), + format_percent_change(self.before.privmem_vgprs, self.after.privmem_vgprs), + format_percent_change(self.before.scratch_size, self.after.scratch_size), + format_percent_change(self.before.code_size, self.after.code_size), + format_percent_change(self.before.maxwaves, self.after.maxwaves, more_is_better = True), + format_percent_change(self.before.waitstates, self.after.waitstates)) + + def print_regression(self, name, field): + more_is_better = field == "maxwaves" + print " {:65}{:10}{:10}{}{}".format( + name, + self.before.__dict__[field], + self.after.__dict__[field], + format_table_cell(self.after.__dict__[field] - self.before.__dict__[field], + more_is_better = more_is_better), + format_percent_change(self.before.__dict__[field], self.after.__dict__[field], + more_is_better = more_is_better)) + +""" +Return "filename [index]", because files can contain multiple shaders. +""" +def get_shader_name(list, orig): + for i in range(10): + # add the index to the name + name = orig + " [{}]".format(i) + if name not in list: + return name + assert False + return "(error)" + + +def print_yellow(str): + print set_yellow + str + set_normal + +def print_tables(before_all_results, after_all_results): + re_app = re.compile(r"^.*/([^/]+)/[^/]+$") + + num_listed = 40 + apps = defaultdict(grouped_stats) + shaders = defaultdict(grouped_stats) + total = grouped_stats() + total_affected = grouped_stats() + + all_files = set(itertools.chain(before_all_results.keys(), + after_all_results.keys())) + + for file in all_files: + # get the application name (inner-most directory) + match_app = re_app.match(file) + if match_app is None: + app = "(unknown)" + else: + app = match_app.group(1) + if len(app) > 22: + app = app[0:19] + ".." + + before_test_results = before_all_results.get(file) + after_test_results = after_all_results.get(file) + + if before_test_results is None or after_test_results is None: + continue + + for before, after in zip(before_test_results, after_test_results): + if after.error or before.error: + continue + + apps[app].add(before, after) + total.add(before, after) + + if not subtract_stats(before, after).is_empty(): + total_affected.add(before, after) + + # we don't have to add all shaders, just those that we may need + # to display + if (is_regression(before, after) or + after.scratch_size > 0 or + after.spilled_vgprs > 0 or + after.privmem_vgprs > 0 or + after.spilled_sgprs > 0): + name = get_shader_name(shaders, file) + shaders[name].set_one_shader(before, after) + + # worst VGPR spills + num = 0 + sort_key = lambda v: -v[1].after.scratch_size + for name, stats in sorted(shaders.items(), key = sort_key): + if num == 0: + print_yellow(" WORST VGPR SPILLS (not deltas)" + (" " * 40) + + "VGPRs SpillVGPR PrivVGPR ScratchSize") + stats.print_one_shader_vgpr_spill(name) + num += 1 + if num == num_listed: + break + if num > 0: + print + + # VGPR spilling apps + print_yellow(" VGPR SPILLING APPS Shaders SpillVGPR PrivVGPR ScratchSize") + for name, stats in sorted(apps.items()): + stats.print_vgpr_spilling_app(name) + print + + # worst SGPR spills + num = 0 + sort_key = lambda v: -v[1].after.spilled_sgprs + for name, stats in sorted(shaders.items(), key = sort_key): + if num == 0: + print_yellow(" WORST SGPR SPILLS (not deltas)" + (" " * 40) + + "SGPRs SpillSGPR") + stats.print_one_shader_sgpr_spill(name) + num += 1 + if num == num_listed: + break + if num > 0: + print + + # SGPR spilling apps + print_yellow(" SGPR SPILLING APPS Shaders SpillSGPR AvgPerSh") + for name, stats in sorted(apps.items()): + stats.print_sgpr_spilling_app(name) + print + + # worst regressions + metrics = si_stats().metrics + for i in range(len(metrics)): + field = metrics[i][0] + num = 0 + more_is_better = metrics[i][0] == 'maxwaves' + + if more_is_better: + sort_key = lambda v: v[1].diff.__dict__[field] + else: + sort_key = lambda v: -v[1].diff.__dict__[field] + + for name, stats in sorted(shaders.items(), key = sort_key): + if more_is_better: + if stats.diff.__dict__[field] >= 0: + continue + else: + if stats.diff.__dict__[field] <= 0: + continue + + if num == 0: + print_yellow(" WORST REGRESSIONS - {:49}".format(metrics[i][1]) + + "Before After Delta Percentage") + stats.print_regression(name, field) + num += 1 + if num == num_listed: + break + if num > 0: + print + + # percentages + legend = "Shaders SGPRs VGPRs SpillSGPR SpillVGPR PrivVGPR Scratch CodeSize MaxWaves Waits" + print_yellow(" PERCENTAGE DELTAS " + legend) + for name, stats in sorted(apps.items()): + stats.print_percentages(name) + print " " + ("-" * (21 + len(legend))) + total_affected.print_percentages("All affected") + print " " + ("-" * (21 + len(legend))) + total.print_percentages("Total") + print + +def main(): + before = sys.argv[1] + after = sys.argv[2] + + results_before = get_results(before) + results_after = get_results(after) + + compare_results(results_before, results_after) + print_tables(results_before, results_after) + +if __name__ == "__main__": + main() @@ -0,0 +1,539 @@ +/* + * Copyright © 2018 Valve Corporation + * + * Based in part on shader-db which is: + * Copyright © 2014 Intel Corporation + * Copyright © 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <ftw.h> +#include <time.h> + +#include "serialize.h" + +#define unlikely(x) __builtin_expect(!!(x), 0) + +/* Pipeline tests. */ +static unsigned pipeline_test_size = 1 << 15; /* next-pow-2(num pipelines in db) */ +static unsigned pipeline_test_length; +static struct pipeline_test { + char *filename; + off_t filesize; +} *pipeline_test; + +static int +gather_pipeline_test(const char *fpath, const struct stat *sb, int typeflag) +{ + static const char *ext = ".pipeline_test"; + + if (strlen(fpath) >= strlen(ext) && + memcmp(fpath + strlen(fpath) - strlen(ext), ext, strlen(ext)) == 0) { + if (unlikely(!S_ISREG(sb->st_mode))) { + fprintf(stderr, "ERROR: %s is not a regular file\n", fpath); + return -1; + } + + if (unlikely(pipeline_test_size <= pipeline_test_length)) { + pipeline_test_size *= 2; + pipeline_test = + realloc(pipeline_test, + pipeline_test_size * sizeof(struct pipeline_test)); + } + + pipeline_test[pipeline_test_length].filename = malloc(strlen(fpath) + 1); + memcpy(pipeline_test[pipeline_test_length].filename, fpath, strlen(fpath) + 1); + pipeline_test[pipeline_test_length].filesize = sb->st_size; + pipeline_test_length++; + } + + return 0; +} + +/* Shader stats */ +static PFN_vkGetShaderInfoAMD vkGetShaderInfo = VK_NULL_HANDLE; + +struct shader_stats +{ + unsigned num_sgprs; + unsigned num_vgprs; + unsigned num_spilled_sgprs; + unsigned num_spilled_vgprs; + unsigned priv_mem_vgprs; + unsigned code_size; + unsigned lds; + unsigned scratch; + unsigned max_waves; +}; + +#define PARSE_STAT(key, value) \ + line = strtok(NULL, "\n"); \ + if (sscanf(line, key, value) != 1) \ + return -1; + +static int +parse_shader_stats(char *buf, struct shader_stats *stats) +{ + char *line; + + line = strtok(buf, "\n"); + while(line) { + if (!strcmp(line, "*** SHADER STATS ***")) + break; + line = strtok(NULL, "\n"); + } + + if (unlikely(!line)) + return -1; + + PARSE_STAT("SGPRS: %d\n", &stats->num_sgprs); + PARSE_STAT("VGPRS: %d\n", &stats->num_vgprs); + PARSE_STAT("Spilled SGPRs: %d\n", &stats->num_spilled_sgprs); + PARSE_STAT("Spilled VGPRs: %d\n", &stats->num_spilled_vgprs); + PARSE_STAT("PrivMem VGPRS: %d\n", &stats->priv_mem_vgprs); + PARSE_STAT("Code Size: %d bytes\n", &stats->code_size); + PARSE_STAT("LDS: %d blocks\n", &stats->lds); + PARSE_STAT("Scratch: %d bytes per wave\n", &stats->scratch); + PARSE_STAT("Max Waves: %d\n", &stats->max_waves); + + return 0; +} + +static bool +is_shader_stage_valid(VkDevice device, VkPipeline pipeline, + VkShaderStageFlagBits stage) +{ + VkResult result; + size_t size; + + result = vkGetShaderInfo(device, pipeline, stage, + VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD, &size, NULL); + if (result == VK_ERROR_FEATURE_NOT_PRESENT){ + /* The spec doesn't state what to do when the stage is invalid, and RADV + * returns VK_ERROR_FEATURE_NOT_PRESENT in this situation, mostly for + * merged shaders on GFX9. + */ + return false; + } + + return true; +} + +static int +get_shader_stats(VkDevice device, VkPipeline pipeline, + VkShaderStageFlagBits stage, + struct shader_stats *stats) +{ + VkResult result; + size_t size; + char *buf; + int ret = 0; + + result = vkGetShaderInfo(device, pipeline, stage, + VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD, &size, NULL); + if (unlikely(result != VK_SUCCESS)) + return -1; + + buf = malloc(size); + if (unlikely(!buf)) + return -1; + + result = vkGetShaderInfo(device, pipeline, stage, + VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD, &size, buf); + if (unlikely(result != VK_SUCCESS)) { + ret = -1; + goto fail; + } + + if (unlikely(parse_shader_stats(buf, stats) < 0)) { + ret = -1; + goto fail; + } + +fail: + free(buf); + return ret; +} + +static void +print_shader_stats(const char *pipeline_name, VkShaderStageFlagBits stage, + const struct shader_stats *stats) +{ + printf("%s - ", pipeline_name); + printf("Shader Stats: "); + printf("SGPRS: %d ", stats->num_sgprs); + printf("VGPRS: %d ", stats->num_vgprs); + printf("Code Size: %d ", stats->code_size); + printf("LDS: %d ", stats->lds); + printf("Scratch: %d ", stats->scratch); + printf("Max Waves: %d ", stats->max_waves); + printf("Spilled SGPRs: %d ", stats->num_spilled_sgprs); + printf("Spilled VGPRs: %d ", stats->num_spilled_vgprs); + printf("PrivMem VGPRs: %d ", stats->priv_mem_vgprs); + printf("\n"); +} + +static VkResult +create_graphics_pipeline(VkDevice device, struct pipeline_info *info, + VkPipelineLayout layout, VkPipeline *pipeline) + +{ + VkGraphicsPipelineCreateInfo pipelineInfo = {}; + VkRenderPass renderPass = VK_NULL_HANDLE; + VkResult result; + + /* Render pass. */ + result = vkCreateRenderPass(device, &info->renderPassInfo, + NULL, &renderPass); + if (unlikely(result != VK_SUCCESS)) + return result; + + /* Graphics pipeline. */ + pipelineInfo.stageCount = info->stageCount; + pipelineInfo.pStages = info->pShaderStagesInfo; + pipelineInfo.pVertexInputState = &info->vertexInputState; + pipelineInfo.pInputAssemblyState = &info->inputAssemblyState; + pipelineInfo.pTessellationState = &info->tessellationState; + pipelineInfo.pViewportState = &info->viewportState; + pipelineInfo.pRasterizationState = &info->rasterizationState; + pipelineInfo.pMultisampleState = &info->multisampleState; + pipelineInfo.pDepthStencilState = &info->depthStencilState; + pipelineInfo.pColorBlendState = &info->colorBlendState; + pipelineInfo.pDynamicState = &info->dynamicState; + pipelineInfo.layout = layout; + pipelineInfo.renderPass = renderPass; + + result = vkCreateGraphicsPipelines(device, NULL, 1, &pipelineInfo, + NULL, pipeline); + + vkDestroyRenderPass(device, renderPass, NULL); + return result; +} + +static int +create_compute_pipeline(VkDevice device, struct pipeline_info *info, + VkPipelineLayout layout, VkPipeline *pipeline) +{ + VkComputePipelineCreateInfo pipelineInfo = {}; + + /* Compute pipeline. */ + pipelineInfo.stage = *info->pShaderStagesInfo; + pipelineInfo.layout = layout; + + return vkCreateComputePipelines(device, NULL, 1, &pipelineInfo, + NULL, pipeline); +} +static int +create_pipeline(VkDevice device, const char *pipeline_name, + struct pipeline_info *info) +{ + VkPipelineLayout layout = VK_NULL_HANDLE; + VkPipeline pipeline = VK_NULL_HANDLE; + VkResult result; + int ret = 0; + + /* Shader modules. */ + for (uint32_t i = 0; i < info->stageCount; i++) { + result = vkCreateShaderModule(device, &info->pShaderModulesInfo[i], + NULL, &info->pShaderStagesInfo[i].module); + if (unlikely(result != VK_SUCCESS)) { + ret = -1; + goto fail; + } + } + + /* Descriptor set layouts. */ + VkDescriptorSetLayout *pSetLayouts = + calloc(info->pipelineLayoutInfo.setLayoutCount, sizeof(*pSetLayouts)); + if (unlikely(!pSetLayouts)) { + ret = -1; + goto fail; + } + + for (uint32_t i = 0; i < info->pipelineLayoutInfo.setLayoutCount; i++) { + result = vkCreateDescriptorSetLayout(device, &info->pSetLayoutsInfo[i], + NULL, &pSetLayouts[i]); + if (unlikely(result != VK_SUCCESS)) { + ret = -1; + goto fail; + } + } + + /* Attach descriptor set layouts to the pipeline. */ + info->pipelineLayoutInfo.pSetLayouts = pSetLayouts; + + /* Pipeline layout. */ + result = vkCreatePipelineLayout(device, &info->pipelineLayoutInfo, + NULL, &layout); + if (unlikely(result != VK_SUCCESS)) { + ret = -1; + goto fail; + } + + /* Graphics/Compute pipeline. */ + if (info->bindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { + result = create_graphics_pipeline(device, info, layout, &pipeline); + } else { + assert(info->bindPoint == VK_PIPELINE_BIND_POINT_COMPUTE); + result = create_compute_pipeline(device, info, layout, &pipeline); + } + + if (unlikely(result != VK_SUCCESS)) { + ret = -1; + goto fail; + } + + /* Shader stats. */ + for (uint32_t i = 0; i < info->stageCount; i++) { + VkPipelineShaderStageCreateInfo *pCreateInfo = &info->pShaderStagesInfo[i]; + VkShaderStageFlagBits stage = pCreateInfo->stage; + struct shader_stats stats = {}; + + if (!is_shader_stage_valid(device, pipeline, stage)) + continue; + + ret = get_shader_stats(device, pipeline, stage, &stats); + if (unlikely(ret < 0)) { + fprintf(stderr, "Failed to get shader stats!\n"); + goto fail; + } + + print_shader_stats(pipeline_name, stage, &stats); + } + +fail: + for (uint32_t i = 0; i < info->stageCount; i++) + vkDestroyShaderModule(device, info->pShaderStagesInfo[i].module, NULL); + for (uint32_t i = 0; i < info->pipelineLayoutInfo.setLayoutCount; i++) + vkDestroyDescriptorSetLayout(device, pSetLayouts[i], NULL); + vkDestroyPipelineLayout(device, layout, NULL); + vkDestroyPipeline(device, pipeline, NULL); + free(pSetLayouts); + + return ret; +} + +static void +free_pipeline(struct pipeline_info *pipeline) +{ + for (uint32_t i = 0; i < pipeline->pipelineLayoutInfo.setLayoutCount; i++) { + VkDescriptorSetLayoutCreateInfo *pInfo = &pipeline->pSetLayoutsInfo[i]; + free((void *)pInfo->pBindings); + } + free(pipeline->pSetLayoutsInfo); + free(pipeline->pShaderStagesInfo); + free(pipeline->pShaderModulesInfo); + free((void *)pipeline->renderPassInfo.pSubpasses); /* XXX*/ + free(pipeline); +} + +static int +run(VkDevice device, const char *pipeline_name, const char *data, off_t size) +{ + struct pipeline_info *pipeline; + struct blob_reader metadata; + int ret = 0; + + blob_reader_init(&metadata, data, size); + + pipeline = calloc(1, sizeof(*pipeline)); + if (unlikely(!pipeline)) + return -1; + + if (unlikely(!deserialize_pipeline(pipeline, &metadata))) { + fprintf(stderr, "Failed to deserialize pipeline, corrupted data?\n"); + return -1; + } + + ret = create_pipeline(device, pipeline_name, pipeline); + if (unlikely(ret < 0)) { + fprintf(stderr, "Failed to create pipeline!\n"); + goto fail; + } + +fail: + free_pipeline(pipeline); + return ret; +} + +static void +print_usage(const char *prog_name) +{ + fprintf(stderr, + "Usage: %s <directories and *.pipeline_test files>\n", + prog_name); +} + +int main(int argc, char **argv) +{ + const char *extensionNames[] = { "VK_AMD_shader_info" }; + VkQueueFamilyProperties queue_family; + VkPhysicalDevice *physical_devices; + struct timespec start, end; + uint32_t device_count; + uint32_t queue_count = 1; + VkInstance instance; + VkDevice device; + VkResult result; + int ret = 0; + + if (argc < 2) { + fprintf(stderr, "No directories specified!\n"); + print_usage(argv[0]); + return -1; + } + + /** + * Instance creation. + */ + VkInstanceCreateInfo instanceCreateInfo = {}; + instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + + result = vkCreateInstance(&instanceCreateInfo, NULL, &instance); + if (unlikely(result != VK_SUCCESS)) { + fprintf(stderr, "Failed to create instance (%d).\n", result); + return -1; + } + + /** + * Device creation. + */ + /* Get number of devices. */ + result = vkEnumeratePhysicalDevices(instance, &device_count, NULL); + fprintf(stderr, "Number of devices: %d\n", device_count); + + physical_devices = malloc(sizeof(*physical_devices) * device_count); + + /* Get physical devices. */ + result = vkEnumeratePhysicalDevices(instance, &device_count, + physical_devices); + if (unlikely(result != VK_SUCCESS)) { + fprintf(stderr, "Failed to enumerate physical devices (%d).\n", result); + ret = -1; + goto fail_device; + } + + VkPhysicalDeviceProperties device_properties; + vkGetPhysicalDeviceProperties(physical_devices[0], &device_properties); + fprintf(stderr, "GPU: %s\n", device_properties.deviceName); + + /* Get queue properties. */ + vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[0], &queue_count, + &queue_family); + assert(queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT); + + /* Create logical device. */ + VkDeviceQueueCreateInfo queueCreateInfo = {}; + queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queueCreateInfo.queueFamilyIndex = 0; + queueCreateInfo.queueCount = 1; + VkDeviceCreateInfo deviceCreateInfo = {}; + deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + deviceCreateInfo.queueCreateInfoCount = 1; + deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo; + deviceCreateInfo.enabledExtensionCount = 1; + deviceCreateInfo.ppEnabledExtensionNames = extensionNames; + + result = vkCreateDevice(physical_devices[0], &deviceCreateInfo, + NULL, &device); + if (unlikely(result != VK_SUCCESS)) { + if (result == VK_ERROR_EXTENSION_NOT_PRESENT) + fprintf(stderr, "VK_AMD_shader_info is required!\n"); + fprintf(stderr, "Failed to create device (%d).\n", result); + ret = -1; + goto fail_device; + } + + vkGetShaderInfo = + (PFN_vkGetShaderInfoAMD)vkGetDeviceProcAddr(device, + "vkGetShaderInfoAMD"); + + /** + * Runner. + */ + /* Gather all pipeline tests. */ + pipeline_test = malloc(pipeline_test_size * sizeof(struct pipeline_test)); + for (int i = 1; i < argc; i++) { + ftw(argv[i], gather_pipeline_test, 100); + } + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start); + + /* Process each pipeline tests. */ + for (unsigned i = 0; i < pipeline_test_length; i++) { + const char *current_pipeline_name = pipeline_test[i].filename; + off_t filesize = pipeline_test[i].filesize; + char *data; + int fd; + + fprintf(stderr, "--> %s\n", current_pipeline_name); + + fd = open(current_pipeline_name, O_RDONLY); + if (unlikely(fd == -1)) { + perror("open"); + continue; + } + + data = mmap(NULL, filesize, PROT_READ, MAP_PRIVATE, fd, 0); + if (unlikely(data == MAP_FAILED)) { + perror("mmap"); + continue; + } + + if (unlikely(close(fd) == -1)) { + perror("close"); + continue; + } + + if (unlikely(run(device, current_pipeline_name, data, filesize) < 0)) + continue; + + if (unlikely(munmap(data, filesize) == -1)) { + perror("munmap"); + continue; + } + + free(pipeline_test[i].filename); + } + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end); + printf("Process took %.2lf seconds and compiled %u pipelines\n", + (end.tv_sec - start.tv_sec) + 10e-9 * (end.tv_nsec - start.tv_nsec), + pipeline_test_length); + + free(pipeline_test); + + vkDestroyDevice(device, NULL); +fail_device: + free(physical_devices); + vkDestroyInstance(instance, NULL); + + return ret; +} diff --git a/serialize.c b/serialize.c new file mode 100644 index 0000000..14a296f --- /dev/null +++ b/serialize.c @@ -0,0 +1,959 @@ +/* + * Copyright © 2018 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> + +#include "serialize.h" + +/** + * Sampler. + */ +static void +serialize_sampler(const VkSamplerCreateInfo *pInfo, + struct blob *metadata) +{ + assert(pInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->magFilter); + blob_write_uint32(metadata, pInfo->minFilter); + blob_write_uint32(metadata, pInfo->mipmapMode); + blob_write_uint32(metadata, pInfo->addressModeU); + blob_write_uint32(metadata, pInfo->addressModeV); + blob_write_uint32(metadata, pInfo->addressModeW); + blob_write_uint32(metadata, pInfo->mipLodBias); + blob_write_uint32(metadata, pInfo->anisotropyEnable); + blob_write_uint32(metadata, pInfo->maxAnisotropy); + blob_write_uint32(metadata, pInfo->compareEnable); + blob_write_uint32(metadata, pInfo->compareOp); + blob_write_uint32(metadata, pInfo->minLod); + blob_write_uint32(metadata, pInfo->maxLod); + blob_write_uint32(metadata, pInfo->borderColor); + blob_write_uint32(metadata, pInfo->unnormalizedCoordinates); +} + +static void +deserialize_sampler(VkSamplerCreateInfo *pInfo, + struct blob_reader *metadata) +{ + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + pInfo->magFilter = blob_read_uint32(metadata); + pInfo->minFilter = blob_read_uint32(metadata); + pInfo->mipmapMode = blob_read_uint32(metadata); + pInfo->addressModeU = blob_read_uint32(metadata); + pInfo->addressModeV = blob_read_uint32(metadata); + pInfo->addressModeW = blob_read_uint32(metadata); + pInfo->mipLodBias = blob_read_uint32(metadata); + pInfo->anisotropyEnable = blob_read_uint32(metadata); + pInfo->maxAnisotropy = blob_read_uint32(metadata); + pInfo->compareEnable = blob_read_uint32(metadata); + pInfo->compareOp = blob_read_uint32(metadata); + pInfo->minLod = blob_read_uint32(metadata); + pInfo->maxLod = blob_read_uint32(metadata); + pInfo->borderColor = blob_read_uint32(metadata); + pInfo->unnormalizedCoordinates = blob_read_uint32(metadata); +} + +/** + * Render pass. + */ +static void +serialize_render_pass(struct pipeline_info *pipeline, + struct blob *metadata) +{ + const VkRenderPassCreateInfo *pInfo = &pipeline->renderPassInfo; + + assert(pInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->attachmentCount); + if (pInfo->attachmentCount) { + blob_write_bytes(metadata, pInfo->pAttachments, + sizeof(*pInfo->pAttachments) * + pInfo->attachmentCount); + } + + blob_write_uint32(metadata, pInfo->subpassCount); + for (uint32_t i = 0; i < pInfo->subpassCount; i++) { + const VkSubpassDescription *subpass = &pInfo->pSubpasses[i]; + bool has_depth_stencil = subpass->pDepthStencilAttachment ? true : false; + + blob_write_uint32(metadata, subpass->flags); + blob_write_uint32(metadata, subpass->pipelineBindPoint); + + blob_write_uint32(metadata, subpass->inputAttachmentCount); + if (subpass->inputAttachmentCount) { + blob_write_bytes(metadata, subpass->pInputAttachments, + sizeof(*subpass->pInputAttachments) * + subpass->inputAttachmentCount); + } + + blob_write_uint32(metadata, subpass->colorAttachmentCount); + if (subpass->colorAttachmentCount) { + bool has_resolve = subpass->pResolveAttachments ? true : false; + + blob_write_bytes(metadata, subpass->pColorAttachments, + sizeof(*subpass->pColorAttachments) * + subpass->colorAttachmentCount); + + blob_write_uint32(metadata, has_resolve); + if (has_resolve) { + blob_write_bytes(metadata, subpass->pResolveAttachments, + sizeof(*subpass->pResolveAttachments) * + subpass->colorAttachmentCount); + } + } + + blob_write_uint32(metadata, has_depth_stencil); + if (has_depth_stencil) { + blob_write_bytes(metadata, subpass->pDepthStencilAttachment, + sizeof(*subpass->pDepthStencilAttachment)); + } + + blob_write_uint32(metadata, subpass->preserveAttachmentCount); + if (subpass->preserveAttachmentCount) { + blob_write_bytes(metadata, subpass->pPreserveAttachments, + sizeof(*subpass->pPreserveAttachments) * + subpass->preserveAttachmentCount); + } + } + + blob_write_uint32(metadata, pInfo->dependencyCount); + if (pInfo->dependencyCount) { + blob_write_bytes(metadata, pInfo->pDependencies, + sizeof(*pInfo->pDependencies) * + pInfo->dependencyCount); + } +} + +static bool +deserialize_render_pass(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkRenderPassCreateInfo *pInfo = &pipeline->renderPassInfo; + + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + pInfo->attachmentCount = blob_read_uint32(metadata); + if (pInfo->attachmentCount) { + pInfo->pAttachments = + blob_read_bytes(metadata, sizeof(*pInfo->pAttachments) * + pInfo->attachmentCount); + } + + pInfo->subpassCount = blob_read_uint32(metadata); + if (pInfo->subpassCount) { + VkSubpassDescription *pSubpasses = + calloc(pInfo->subpassCount, sizeof(*pSubpasses)); /* TODO: remove me */ + + for (uint32_t i = 0; i < pInfo->subpassCount; i++) { + VkSubpassDescription *subpass = &pSubpasses[i]; + + subpass->flags = blob_read_uint32(metadata); + subpass->pipelineBindPoint = blob_read_uint32(metadata); + + subpass->inputAttachmentCount = blob_read_uint32(metadata); + if (subpass->inputAttachmentCount) { + subpass->pInputAttachments = + blob_read_bytes(metadata, sizeof(*subpass->pInputAttachments) * + subpass->inputAttachmentCount); + } + + subpass->colorAttachmentCount = blob_read_uint32(metadata); + if (subpass->colorAttachmentCount) { + subpass->pColorAttachments = + blob_read_bytes(metadata, sizeof(*subpass->pColorAttachments) * + subpass->colorAttachmentCount); + + if (blob_read_uint32(metadata)) { + subpass->pResolveAttachments = + blob_read_bytes(metadata, sizeof(*subpass->pResolveAttachments) * + subpass->colorAttachmentCount); + } + } + + if (blob_read_uint32(metadata)) { + subpass->pDepthStencilAttachment = + blob_read_bytes(metadata, sizeof(*subpass->pDepthStencilAttachment)); + } + + subpass->preserveAttachmentCount = blob_read_uint32(metadata); + if (subpass->preserveAttachmentCount) { + subpass->pPreserveAttachments = + blob_read_bytes(metadata, sizeof(*subpass->pPreserveAttachments) * + subpass->preserveAttachmentCount); + } + } + + pInfo->pSubpasses = pSubpasses; + } + + pInfo->dependencyCount = blob_read_uint32(metadata); + if (pInfo->dependencyCount) { + pInfo->pDependencies = + blob_read_bytes(metadata, sizeof(*pInfo->pDependencies) * + pInfo->dependencyCount); + } + + return true; +} + +/** + * Shader module. + */ +static void +serialize_shader_module(const VkShaderModuleCreateInfo *pInfo, + struct blob *metadata) +{ + assert(pInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->codeSize); + blob_write_bytes(metadata, pInfo->pCode, pInfo->codeSize); +} + +static bool +deserialize_shader_module(VkShaderModuleCreateInfo *pInfo, + struct blob_reader *metadata) +{ + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + + pInfo->codeSize = blob_read_uint32(metadata); + pInfo->pCode = blob_read_bytes(metadata, pInfo->codeSize); + + return true; +} + +/** + * Descriptor set layout. + */ +static void +serialize_descriptor_set_layout(const VkDescriptorSetLayoutCreateInfo *pInfo, + struct blob *metadata) +{ + assert(pInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->bindingCount); + for (uint32_t i = 0; i < pInfo->bindingCount; i++) { + const VkDescriptorSetLayoutBinding *binding = + &pInfo->pBindings[i]; + + blob_write_uint32(metadata, binding->binding); + blob_write_uint32(metadata, binding->descriptorType); + blob_write_uint32(metadata, binding->descriptorCount); + blob_write_uint32(metadata, binding->stageFlags); + + /* TODO: pImmutableSamplers */ + } +} + +static bool +deserialize_descriptor_set_layout(VkDescriptorSetLayoutCreateInfo *pInfo, + struct blob_reader *metadata) +{ + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + pInfo->bindingCount = blob_read_uint32(metadata); + if (pInfo->bindingCount) { + VkDescriptorSetLayoutBinding *pBindings = + calloc(pInfo->bindingCount, sizeof(*pBindings)); /* TODO: remove me */ + + if (!pBindings) + return false; + + for (uint32_t i = 0; i < pInfo->bindingCount; i++) { + VkDescriptorSetLayoutBinding *binding = &pBindings[i]; + + binding->binding = blob_read_uint32(metadata); + binding->descriptorType = blob_read_uint32(metadata); + binding->descriptorCount = blob_read_uint32(metadata); + binding->stageFlags = blob_read_uint32(metadata); + binding->pImmutableSamplers = NULL; + /* TODO: pImmutableSamplers */ + } + + pInfo->pBindings = pBindings; + } + + return true; +} + +/** + * Pipeline layout. + */ +static void +serialize_pipeline_layout(struct pipeline_info *pipeline, + struct blob *metadata) +{ + VkPipelineLayoutCreateInfo *pInfo = &pipeline->pipelineLayoutInfo; + + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->setLayoutCount); + for (uint32_t i = 0; i < pInfo->setLayoutCount; i++) { + serialize_descriptor_set_layout(&pipeline->pSetLayoutsInfo[i], + metadata); + } + + blob_write_uint32(metadata, pInfo->pushConstantRangeCount); + if (pInfo->pushConstantRangeCount) { + blob_write_bytes(metadata, pInfo->pPushConstantRanges, + sizeof(*pInfo->pPushConstantRanges) * + pInfo->pushConstantRangeCount); + } +} + +static bool +deserialize_pipeline_layout(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkPipelineLayoutCreateInfo *pInfo = &pipeline->pipelineLayoutInfo; + + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + pInfo->setLayoutCount = blob_read_uint32(metadata); + if (pInfo->setLayoutCount) { + pipeline->pSetLayoutsInfo = + calloc(pInfo->setLayoutCount, sizeof(*pipeline->pSetLayoutsInfo)); + + if (!pipeline->pSetLayoutsInfo) + return false; + + for (uint32_t i = 0; i < pInfo->setLayoutCount; i++) { + if (!deserialize_descriptor_set_layout(&pipeline->pSetLayoutsInfo[i], + metadata)) + return false; + } + } + + pInfo->pushConstantRangeCount = blob_read_uint32(metadata); + if (pInfo->pushConstantRangeCount) { + pInfo->pPushConstantRanges = + blob_read_bytes(metadata, sizeof(*pInfo->pPushConstantRanges) * + pInfo->pushConstantRangeCount); + } + + return true; +} + +/** + * Graphics pipeline. + */ +/* Shader stage. */ +void +serialize_shader_stage_state(const VkPipelineShaderStageCreateInfo *pInfo, + struct blob *metadata) +{ + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->stage); + blob_write_string(metadata, pInfo->pName); +} + +static bool +deserialize_shader_stage_state(VkPipelineShaderStageCreateInfo *pInfo, + struct blob_reader *metadata) +{ + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO); + + pInfo->stage = blob_read_uint32(metadata); + pInfo->pName = blob_read_string(metadata); + + return true; +} + +static void +serialize_shader_stage_states(struct pipeline_info *pipeline, + struct blob *metadata) +{ + for (uint32_t i = 0; i < pipeline->stageCount; i++) { + serialize_shader_stage_state(&pipeline->pShaderStagesInfo[i], + metadata); + + serialize_shader_module(&pipeline->pShaderModulesInfo[i], metadata); + } +} + +static bool +deserialize_shader_stage_states(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + bool valid = true; + + pipeline->pShaderStagesInfo = + calloc(pipeline->stageCount, sizeof(*pipeline->pShaderStagesInfo)); + if (!pipeline->pShaderStagesInfo) + return false; + + pipeline->pShaderModulesInfo = + calloc(pipeline->stageCount, sizeof (*pipeline->pShaderModulesInfo)); + if (!pipeline->pShaderModulesInfo) + return false; + + for (uint32_t i = 0; i < pipeline->stageCount; i++) { + valid &= deserialize_shader_stage_state(&pipeline->pShaderStagesInfo[i], + metadata); + + valid &= deserialize_shader_module(&pipeline->pShaderModulesInfo[i], + metadata); + } + + return valid; +} + +/* Vertex input state. */ +static void +serialize_vertex_input_state(const VkPipelineVertexInputStateCreateInfo *pInfo, + struct blob *metadata) +{ + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->vertexBindingDescriptionCount); + if (pInfo->vertexBindingDescriptionCount) { + blob_write_bytes(metadata, pInfo->pVertexBindingDescriptions, + sizeof(*pInfo->pVertexBindingDescriptions) * + pInfo->vertexBindingDescriptionCount); + } + + blob_write_uint32(metadata, pInfo->vertexAttributeDescriptionCount); + if (pInfo->vertexAttributeDescriptionCount) { + blob_write_bytes(metadata, pInfo->pVertexAttributeDescriptions, + sizeof(*pInfo->pVertexAttributeDescriptions) * + pInfo->vertexAttributeDescriptionCount); + } +} + +static bool +deserialize_vertex_input_state(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkPipelineVertexInputStateCreateInfo *pInfo = &pipeline->vertexInputState; + + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO); + + pInfo->vertexBindingDescriptionCount = blob_read_uint32(metadata); + if (pInfo->vertexBindingDescriptionCount) { + pInfo->pVertexBindingDescriptions = + blob_read_bytes(metadata, sizeof(*pInfo->pVertexBindingDescriptions) * + pInfo->vertexBindingDescriptionCount); + } + + pInfo->vertexAttributeDescriptionCount = blob_read_uint32(metadata); + if (pInfo->vertexAttributeDescriptionCount) { + pInfo->pVertexAttributeDescriptions = + blob_read_bytes(metadata, sizeof(*pInfo->pVertexAttributeDescriptions) * + pInfo->vertexAttributeDescriptionCount); + } + + return true; +} + +/* Input assembly state. */ +static void +serialize_input_assembly_state(const VkPipelineInputAssemblyStateCreateInfo *pInfo, + struct blob *metadata) +{ + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->topology); + blob_write_uint32(metadata, pInfo->primitiveRestartEnable); +} + +static bool +deserialize_input_assembly_state(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkPipelineInputAssemblyStateCreateInfo *pInfo = &pipeline->inputAssemblyState; + + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO); + + pInfo->topology = blob_read_uint32(metadata); + pInfo->primitiveRestartEnable = blob_read_uint32(metadata); + + return true; +} + +/* Tessellation state. */ +static void +serialize_tessellation_state(const VkPipelineTessellationStateCreateInfo *pInfo, + struct blob *metadata) +{ + bool has_state = + pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; + + blob_write_uint32(metadata, has_state); + if (!has_state) + return; + + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->patchControlPoints); +} + +static bool +deserialize_tessellation_state(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkPipelineTessellationStateCreateInfo *pInfo = &pipeline->tessellationState; + bool has_state; + + has_state = blob_read_uint32(metadata); + if (!has_state) + return true; + + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO); + + pInfo->patchControlPoints = blob_read_uint32(metadata); + + return true; +} + +/* Viewport state. */ +static void +serialize_viewport_state(const VkPipelineViewportStateCreateInfo *pInfo, + struct blob *metadata) +{ + bool has_viewports = pInfo->pViewports ? true : false; + bool has_scissors = pInfo->pScissors ? true : false; + + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->viewportCount); + blob_write_uint32(metadata, has_viewports); + if (has_viewports) { + blob_write_bytes(metadata, pInfo->pViewports, + sizeof(*pInfo->pViewports) * + pInfo->viewportCount); + } + + blob_write_uint32(metadata, pInfo->scissorCount); + blob_write_uint32(metadata, has_scissors); + if (has_scissors) { + blob_write_bytes(metadata, pInfo->pScissors, + sizeof(*pInfo->pScissors) * + pInfo->scissorCount); + } +} + +static bool +deserialize_viewport_state(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkPipelineViewportStateCreateInfo *pInfo = &pipeline->viewportState; + + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO); + + pInfo->viewportCount = blob_read_uint32(metadata); + if (blob_read_uint32(metadata)) { + pInfo->pViewports = + blob_read_bytes(metadata, sizeof(*pInfo->pViewports) * + pInfo->viewportCount); + } + + pInfo->scissorCount = blob_read_uint32(metadata); + if (blob_read_uint32(metadata)) { + pInfo->pScissors = + blob_read_bytes(metadata, sizeof(*pInfo->pScissors) * + pInfo->scissorCount); + } + + return true; +} + +/* Rasterization state. */ +static void +serialize_rasterization_state(const VkPipelineRasterizationStateCreateInfo *pInfo, + struct blob *metadata) +{ + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->depthClampEnable); + blob_write_uint32(metadata, pInfo->rasterizerDiscardEnable); + blob_write_uint32(metadata, pInfo->polygonMode); + blob_write_uint32(metadata, pInfo->cullMode); + blob_write_uint32(metadata, pInfo->frontFace); + blob_write_uint32(metadata, pInfo->depthBiasEnable); + blob_write_uint32(metadata, pInfo->depthBiasConstantFactor); + blob_write_uint32(metadata, pInfo->depthBiasClamp); + blob_write_uint32(metadata, pInfo->depthBiasSlopeFactor); + blob_write_uint32(metadata, pInfo->lineWidth); +} + +static bool +deserialize_rasterization_state(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkPipelineRasterizationStateCreateInfo *pInfo = &pipeline->rasterizationState; + + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO); + + pInfo->depthClampEnable = blob_read_uint32(metadata); + pInfo->rasterizerDiscardEnable = blob_read_uint32(metadata); + pInfo->polygonMode = blob_read_uint32(metadata); + pInfo->cullMode = blob_read_uint32(metadata); + pInfo->frontFace = blob_read_uint32(metadata); + pInfo->depthBiasEnable = blob_read_uint32(metadata); + pInfo->depthBiasConstantFactor = blob_read_uint32(metadata); + pInfo->depthBiasClamp = blob_read_uint32(metadata); + pInfo->depthBiasSlopeFactor = blob_read_uint32(metadata); + pInfo->lineWidth = blob_read_uint32(metadata); + + return true; +} + +/* Multisample state. */ +static void +serialize_multisample_state(const VkPipelineMultisampleStateCreateInfo *pInfo, + struct blob *metadata) +{ + bool has_sample_mask = pInfo->pSampleMask ? true : false; + + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->rasterizationSamples); + blob_write_uint32(metadata, pInfo->sampleShadingEnable); + blob_write_uint32(metadata, pInfo->minSampleShading); + + blob_write_uint32(metadata, has_sample_mask); + if (has_sample_mask) { + unsigned count = MAX2(pInfo->rasterizationSamples / 32, 1); + + blob_write_bytes(metadata, pInfo->pSampleMask, + sizeof(*pInfo->pSampleMask) * count); + } + + blob_write_uint32(metadata, pInfo->alphaToCoverageEnable); + blob_write_uint32(metadata, pInfo->alphaToOneEnable); +} + +static bool +deserialize_multisample_state(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkPipelineMultisampleStateCreateInfo *pInfo = &pipeline->multisampleState; + + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO); + + pInfo->rasterizationSamples = blob_read_uint32(metadata); + pInfo->sampleShadingEnable = blob_read_uint32(metadata); + pInfo->minSampleShading = blob_read_uint32(metadata); + + if (blob_read_uint32(metadata)) { + unsigned count = MAX2(pInfo->rasterizationSamples / 32, 1); + + pInfo->pSampleMask = + blob_read_bytes(metadata, sizeof(*pInfo->pSampleMask) * count); + } + + pInfo->alphaToCoverageEnable = blob_read_uint32(metadata); + pInfo->alphaToOneEnable = blob_read_uint32(metadata); + + return true; +} + +/* Depth stencil state. */ +static void +serialize_depth_stencil_state(const VkPipelineDepthStencilStateCreateInfo *pInfo, + struct blob *metadata) +{ + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO); + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->depthTestEnable); + blob_write_uint32(metadata, pInfo->depthWriteEnable); + blob_write_uint32(metadata, pInfo->depthCompareOp); + blob_write_uint32(metadata, pInfo->depthBoundsTestEnable); + blob_write_uint32(metadata, pInfo->stencilTestEnable); + + blob_write_bytes(metadata, &pInfo->front, sizeof(pInfo->front)); + blob_write_bytes(metadata, &pInfo->back, sizeof(pInfo->back)); + + blob_write_uint32(metadata, pInfo->minDepthBounds); + blob_write_uint32(metadata, pInfo->maxDepthBounds); +} + +static bool +deserialize_depth_stencil_state(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkPipelineDepthStencilStateCreateInfo *pInfo = &pipeline->depthStencilState; + + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO); + + pInfo->depthTestEnable = blob_read_uint32(metadata); + pInfo->depthWriteEnable = blob_read_uint32(metadata); + pInfo->depthCompareOp = blob_read_uint32(metadata); + pInfo->depthBoundsTestEnable = blob_read_uint32(metadata); + pInfo->stencilTestEnable = blob_read_uint32(metadata); + + blob_copy_bytes(metadata, &pInfo->front, sizeof(pInfo->front)); + blob_copy_bytes(metadata, &pInfo->back, sizeof(pInfo->back)); + + pInfo->minDepthBounds = blob_read_uint32(metadata); + pInfo->maxDepthBounds = blob_read_uint32(metadata); + + return true; +} + +/* Color blend state. */ +static void +serialize_color_blend_state(const VkPipelineColorBlendStateCreateInfo *pInfo, + struct blob *metadata) +{ + bool has_state = + pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + + blob_write_uint32(metadata, has_state); + if (!has_state) + return; + + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->logicOpEnable); + blob_write_uint32(metadata, pInfo->logicOp); + + blob_write_uint32(metadata, pInfo->attachmentCount); + if (pInfo->attachmentCount) { + blob_write_bytes(metadata, pInfo->pAttachments, + sizeof(*pInfo->pAttachments) * + pInfo->attachmentCount); + } + + blob_write_bytes(metadata, pInfo->blendConstants, + sizeof(pInfo->blendConstants)); +} + +static bool +deserialize_color_blend_state(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkPipelineColorBlendStateCreateInfo *pInfo = &pipeline->colorBlendState; + bool has_state; + + has_state = blob_read_uint32(metadata); + if (!has_state) + return true; + + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO); + + pInfo->logicOpEnable = blob_read_uint32(metadata); + pInfo->logicOp = blob_read_uint32(metadata); + + pInfo->attachmentCount = blob_read_uint32(metadata); + if (pInfo->attachmentCount) { + pInfo->pAttachments = + blob_read_bytes(metadata, sizeof(*pInfo->pAttachments) * + pInfo->attachmentCount); + } + + blob_copy_bytes(metadata, pInfo->blendConstants, + sizeof(pInfo->blendConstants)); + + return true; +} + +/* Dynamic state. */ +static void +serialize_dynamic_state(const VkPipelineDynamicStateCreateInfo *pInfo, + struct blob *metadata) +{ + bool has_state = + pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + + blob_write_uint32(metadata, has_state); + if (!has_state) + return; + + blob_write_uint32(metadata, pInfo->sType); + + blob_write_uint32(metadata, pInfo->dynamicStateCount); + blob_write_bytes(metadata, pInfo->pDynamicStates, + sizeof(*pInfo->pDynamicStates) * + pInfo->dynamicStateCount); +} + +static bool +deserialize_dynamic_state(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkPipelineDynamicStateCreateInfo *pInfo = &pipeline->dynamicState; + bool has_state; + + has_state = blob_read_uint32(metadata); + if (!has_state) + return true; + + pInfo->sType = blob_read_uint32(metadata); + assert(pInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO); + + pInfo->dynamicStateCount = blob_read_uint32(metadata); + pInfo->pDynamicStates = + blob_read_bytes(metadata, sizeof(*pInfo->pDynamicStates) * + pInfo->dynamicStateCount); + + return true; +} + +static void +serialize_graphics_pipeline(struct pipeline_info *pipeline, + struct blob *metadata) +{ + + pipeline->bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + blob_write_uint32(metadata, pipeline->stageCount); + + /* Shader stages and modules. */ + serialize_shader_stage_states(pipeline, metadata); + + /* Graphics states. */ + serialize_vertex_input_state(&pipeline->vertexInputState, metadata); + serialize_input_assembly_state(&pipeline->inputAssemblyState, metadata); + serialize_tessellation_state(&pipeline->tessellationState, metadata); + serialize_viewport_state(&pipeline->viewportState, metadata); + serialize_rasterization_state(&pipeline->rasterizationState, metadata); + serialize_multisample_state(&pipeline->multisampleState, metadata); + serialize_depth_stencil_state(&pipeline->depthStencilState, metadata); + serialize_color_blend_state(&pipeline->colorBlendState, metadata); + serialize_dynamic_state(&pipeline->dynamicState, metadata); + + /* Pipeline layout and render pass. */ + serialize_pipeline_layout(pipeline, metadata); + serialize_render_pass(pipeline, metadata); +} + +static bool +deserialize_graphics_pipeline(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + bool valid = true; + + pipeline->bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + pipeline->stageCount = blob_read_uint32(metadata); + + /* Shader stages and modules. */ + valid &= deserialize_shader_stage_states(pipeline, metadata); + + /* Graphics states. */ + valid &= deserialize_vertex_input_state(pipeline, metadata); + valid &= deserialize_input_assembly_state(pipeline, metadata); + valid &= deserialize_tessellation_state(pipeline, metadata); + valid &= deserialize_viewport_state(pipeline, metadata); + valid &= deserialize_rasterization_state(pipeline, metadata); + valid &= deserialize_multisample_state(pipeline, metadata); + valid &= deserialize_depth_stencil_state(pipeline, metadata); + valid &= deserialize_color_blend_state(pipeline, metadata); + valid &= deserialize_dynamic_state(pipeline, metadata); + + /* Pipeline layout and render pass. */ + valid &= deserialize_pipeline_layout(pipeline, metadata); + valid &= deserialize_render_pass(pipeline, metadata); + + return valid; +} + +static void +serialize_compute_pipeline(struct pipeline_info *pipeline, + struct blob *metadata) +{ + serialize_shader_stage_states(pipeline, metadata); + serialize_pipeline_layout(pipeline, metadata); +} + +static bool +deserialize_compute_pipeline(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + bool valid = true; + + pipeline->bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE; + pipeline->stageCount = 1; + + valid &= deserialize_shader_stage_states(pipeline, metadata); + valid &= deserialize_pipeline_layout(pipeline, metadata); + + return valid; +} + +void +serialize_pipeline(struct pipeline_info *pipeline, struct blob *metadata) +{ + VkStructureType sType = + pipeline->bindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS ? + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO : + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + + blob_write_uint32(metadata, sType); + + switch (pipeline->bindPoint) { + case VK_PIPELINE_BIND_POINT_GRAPHICS: + serialize_graphics_pipeline(pipeline, metadata); + break; + case VK_PIPELINE_BIND_POINT_COMPUTE: + serialize_compute_pipeline(pipeline, metadata); + break; + default: + assert(!"invalid pipeline tyoe"); + break; + } +} + +bool +deserialize_pipeline(struct pipeline_info *pipeline, + struct blob_reader *metadata) +{ + VkStructureType sType; + + sType = blob_read_uint32(metadata); + + switch (sType) { + case VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO: + return deserialize_graphics_pipeline(pipeline, metadata); + case VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO: + return deserialize_compute_pipeline(pipeline, metadata); + default: + return false; + } +} diff --git a/serialize.h b/serialize.h new file mode 100644 index 0000000..abada2a --- /dev/null +++ b/serialize.h @@ -0,0 +1,75 @@ +/* + * Copyright © 2018 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef SERIALIZE_H +#define SERIALIZE_H + +#include <vulkan/vulkan.h> + +#include "blob.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipeline_info { + /* Graphics or compute pipeline. */ + VkPipelineBindPoint bindPoint; + + /* Shader stages and modules. */ + uint32_t stageCount; + VkPipelineShaderStageCreateInfo *pShaderStagesInfo; + VkShaderModuleCreateInfo *pShaderModulesInfo; + + /* Descriptor set layouts. */ + VkDescriptorSetLayoutCreateInfo *pSetLayoutsInfo; + + /* Graphics states. */ + VkPipelineVertexInputStateCreateInfo vertexInputState; + VkPipelineInputAssemblyStateCreateInfo inputAssemblyState; + VkPipelineTessellationStateCreateInfo tessellationState; + VkPipelineViewportStateCreateInfo viewportState; + VkPipelineRasterizationStateCreateInfo rasterizationState; + VkPipelineMultisampleStateCreateInfo multisampleState; + VkPipelineDepthStencilStateCreateInfo depthStencilState; + VkPipelineColorBlendStateCreateInfo colorBlendState; + VkPipelineDynamicStateCreateInfo dynamicState; + + /* Pipeline layout and render pass. */ + VkPipelineLayoutCreateInfo pipelineLayoutInfo; + VkRenderPassCreateInfo renderPassInfo; +}; + +void +serialize_pipeline(struct pipeline_info *pipeline, + struct blob *metadata); + +bool +deserialize_pipeline(struct pipeline_info *pipeline, + struct blob_reader *metadata); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/vkpipeline_db.cpp b/vkpipeline_db.cpp new file mode 100644 index 0000000..d6d103b --- /dev/null +++ b/vkpipeline_db.cpp @@ -0,0 +1,1074 @@ +/* + * Copyright © 2018 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <vulkan/vulkan.h> +#include <vulkan/vk_layer.h> + +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> + +#include <mutex> +#include <unordered_map> +#include <atomic> + +#include "serialize.h" + +#undef VK_LAYER_EXPORT +#define VK_LAYER_EXPORT extern "C" + +#include "blob.h" + +// global lock +std::mutex global_lock; +typedef std::lock_guard<std::mutex> lock_guard_t; +typedef std::unique_lock<std::mutex> unique_lock_t; + +// atomic counter for pipeline tests +std::atomic<unsigned> pipeline_count; + +// use the loader's dispatch table pointer as a key for dispatch map lookups +template<typename DispatchableType> +void *GetKey(DispatchableType inst) +{ + return *(void **)inst; +} + +// instance and device dispatch tables +std::unordered_map<void *, VkLayerInstanceDispatchTable> instance_dispatch; +std::unordered_map<void *, VkLayerDispatchTable> device_dispatch; + +// create info objects +std::unordered_map<VkSampler, VkSamplerCreateInfo *> samplers; +std::unordered_map<VkShaderModule, VkShaderModuleCreateInfo *> shader_modules; +std::unordered_map<VkDescriptorSetLayout, VkDescriptorSetLayoutCreateInfo *> descriptor_set_layouts; +std::unordered_map<VkPipelineLayout, VkPipelineLayoutCreateInfo *> pipeline_layouts; +std::unordered_map<VkRenderPass, VkRenderPassCreateInfo *> render_passes; + +// instance chain +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_CreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkInstance* pInstance) +{ + VkResult result; + + VkLayerInstanceCreateInfo *layerCreateInfo = + (VkLayerInstanceCreateInfo *)pCreateInfo->pNext; + + while (layerCreateInfo && + (layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO || + layerCreateInfo->function != VK_LAYER_LINK_INFO)) { + layerCreateInfo = (VkLayerInstanceCreateInfo *)layerCreateInfo->pNext; + } + + if (!layerCreateInfo) + return VK_ERROR_INITIALIZATION_FAILED; + + PFN_vkGetInstanceProcAddr getInstanceProcAddr = + layerCreateInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr; + + layerCreateInfo->u.pLayerInfo = layerCreateInfo->u.pLayerInfo->pNext; + + PFN_vkCreateInstance createInstance = (PFN_vkCreateInstance) + getInstanceProcAddr(VK_NULL_HANDLE, "vkCreateInstance"); + + result = createInstance(pCreateInfo, pAllocator, pInstance); + if (result != VK_SUCCESS) + return result; + + // Create the dispatch instance table. + VkLayerInstanceDispatchTable dispatchTable; + dispatchTable.GetInstanceProcAddr = (PFN_vkGetInstanceProcAddr) + getInstanceProcAddr(*pInstance, "vkGetInstanceProcAddr"); + dispatchTable.DestroyInstance = (PFN_vkDestroyInstance) + getInstanceProcAddr(*pInstance, "vkDestroyInstance"); + dispatchTable.EnumerateDeviceExtensionProperties = (PFN_vkEnumerateDeviceExtensionProperties) + getInstanceProcAddr(*pInstance, "vkEnumerateDeviceExtensionProperties"); + + lock_guard_t l(global_lock); + instance_dispatch[GetKey(*pInstance)] = dispatchTable; + + return VK_SUCCESS; +} + +VK_LAYER_EXPORT void VKAPI_CALL +vkpipeline_db_DestroyInstance( + VkInstance instance, + const VkAllocationCallbacks *pAllocator) +{ + lock_guard_t l(global_lock); + instance_dispatch.erase(GetKey(instance)); +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_EnumerateInstanceLayerProperties( + uint32_t *pPropertyCount, + VkLayerProperties *pProperties) +{ + if (pPropertyCount) + *pPropertyCount = 1; + + if (pProperties) { + strcpy(pProperties->layerName, "VK_LAYER_vkpipeline_db"); + strcpy(pProperties->description, "vkpipeline-db capture layer"); + pProperties->implementationVersion = 1; + pProperties->specVersion = VK_API_VERSION_1_0; + } + + return VK_SUCCESS; +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_EnumerateInstanceExtensionProperties( + const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + if (pLayerName == NULL || strcmp(pLayerName, "VK_LAYER_vkpipeline_db")) + return VK_ERROR_LAYER_NOT_PRESENT; + + if (pPropertyCount) + *pPropertyCount = 0; + + return VK_SUCCESS; +} + +// device chain +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_CreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice) +{ + VkResult result; + + VkLayerDeviceCreateInfo *layerCreateInfo = + (VkLayerDeviceCreateInfo *)pCreateInfo->pNext; + + while (layerCreateInfo && + (layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || + layerCreateInfo->function != VK_LAYER_LINK_INFO)) { + layerCreateInfo = (VkLayerDeviceCreateInfo *)layerCreateInfo->pNext; + } + + if (!layerCreateInfo) + return VK_ERROR_INITIALIZATION_FAILED; + + PFN_vkGetInstanceProcAddr getInstanceProcAddr = + layerCreateInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr; + PFN_vkGetDeviceProcAddr getDeviceProcAddr = + layerCreateInfo->u.pLayerInfo->pfnNextGetDeviceProcAddr; + + layerCreateInfo->u.pLayerInfo = layerCreateInfo->u.pLayerInfo->pNext; + + PFN_vkCreateDevice createDevice = (PFN_vkCreateDevice) + getInstanceProcAddr(VK_NULL_HANDLE, "vkCreateDevice"); + + result = createDevice(physicalDevice, pCreateInfo, pAllocator, pDevice); + if (result != VK_SUCCESS) + return result; + + // create the device dispatch table + VkLayerDispatchTable dispatchTable; + dispatchTable.GetDeviceProcAddr = (PFN_vkGetDeviceProcAddr) + getDeviceProcAddr(*pDevice, "vkGetDeviceProcAddr"); + dispatchTable.DestroyDevice = (PFN_vkDestroyDevice) + getDeviceProcAddr(*pDevice, "vkDestroyDevice"); + dispatchTable.CreateSampler = + (PFN_vkCreateSampler)getDeviceProcAddr(*pDevice, "vkCreateSampler"); + dispatchTable.DestroySampler = + (PFN_vkDestroySampler)getDeviceProcAddr(*pDevice, "vkDestroySampler"); + dispatchTable.CreateDescriptorSetLayout = + (PFN_vkCreateDescriptorSetLayout)getDeviceProcAddr(*pDevice, "vkCreateDescriptorSetLayout"); + dispatchTable.DestroyDescriptorSetLayout = + (PFN_vkDestroyDescriptorSetLayout)getDeviceProcAddr(*pDevice, "vkDestroyDescriptorSetLayout"); + dispatchTable.CreatePipelineLayout = + (PFN_vkCreatePipelineLayout)getDeviceProcAddr(*pDevice, "vkCreatePipelineLayout"); + dispatchTable.DestroyPipelineLayout = + (PFN_vkDestroyPipelineLayout)getDeviceProcAddr(*pDevice, "vkDestroyPipelineLayout"); + dispatchTable.CreateShaderModule = + (PFN_vkCreateShaderModule)getDeviceProcAddr(*pDevice, "vkCreateShaderModule"); + dispatchTable.DestroyShaderModule = + (PFN_vkDestroyShaderModule)getDeviceProcAddr(*pDevice, "vkDestroyShaderModule"); + dispatchTable.CreateRenderPass = + (PFN_vkCreateRenderPass)getDeviceProcAddr(*pDevice, "vkCreateRenderPass"); + dispatchTable.DestroyRenderPass = + (PFN_vkDestroyRenderPass)getDeviceProcAddr(*pDevice, "vkDestroyRenderPass"); + dispatchTable.CreateGraphicsPipelines = + (PFN_vkCreateGraphicsPipelines)getDeviceProcAddr(*pDevice, "vkCreateGraphicsPipelines"); + dispatchTable.CreateComputePipelines = + (PFN_vkCreateComputePipelines)getDeviceProcAddr(*pDevice, "vkCreateComputePipelines"); + + lock_guard_t l(global_lock); + device_dispatch[GetKey(*pDevice)] = dispatchTable; + + return VK_SUCCESS; +} + +VK_LAYER_EXPORT void VKAPI_CALL +vkpipeline_db_DestroyDevice( + VkDevice device, + const VkAllocationCallbacks *pAllocator) +{ + lock_guard_t l(global_lock); + device_dispatch.erase(GetKey(device)); +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_EnumerateDeviceLayerProperties( + VkPhysicalDevice physicalDevice, + uint32_t *pPropertyCount, + VkLayerProperties *pProperties) +{ + return vkpipeline_db_EnumerateInstanceLayerProperties(pPropertyCount, + pProperties); +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_EnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + if (pLayerName == NULL || strcmp(pLayerName, "VK_LAYER_vkpipeline_db")) { + if (physicalDevice == VK_NULL_HANDLE) + return VK_SUCCESS; + + lock_guard_t l(global_lock); + return instance_dispatch[GetKey(physicalDevice)].EnumerateDeviceExtensionProperties(physicalDevice, pLayerName, pPropertyCount, pProperties); + } + + if (pPropertyCount) + *pPropertyCount = 0; + + return VK_SUCCESS; +} + +// sampler +static const VkSamplerCreateInfo * +get_sampler_info(VkSampler sampler) +{ + unique_lock_t lock(global_lock); + auto it = samplers.find(sampler); + lock.unlock(); + + if (it == samplers.end()) + return nullptr; + return it->second; +} + +static void +destroy_sampler_info(VkSampler sampler) +{ + unique_lock_t lock(global_lock); + auto it = samplers.find(sampler); + if (it != samplers.end()) + free(it->second); + samplers.erase(sampler); + lock.unlock(); +} + +static void +record_sampler_info(VkSampler *pSampler, const VkSamplerCreateInfo *pCreateInfo) +{ + VkSamplerCreateInfo *pSamplerInfo; + + pSamplerInfo = (VkSamplerCreateInfo *)malloc(sizeof(*pSamplerInfo)); + + memcpy(pSamplerInfo, pCreateInfo, sizeof(*pSamplerInfo)); + + lock_guard_t lock(global_lock); + samplers[*pSampler] = pSamplerInfo; +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_CreateSampler( + VkDevice device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler) +{ + VkResult result; + + result = device_dispatch[GetKey(device)].CreateSampler(device, + pCreateInfo, + pAllocator, + pSampler); + if (result == VK_SUCCESS) + record_sampler_info(pSampler, pCreateInfo); + return result; +} + +VK_LAYER_EXPORT void VKAPI_CALL +vkpipeline_db_DestroySampler( + VkDevice device, + VkSampler sampler, + const VkAllocationCallbacks* pAllocator) +{ + destroy_sampler_info(sampler); + device_dispatch[GetKey(device)].DestroySampler(device, sampler, pAllocator); +} + +// descriptor set layout +static const VkDescriptorSetLayoutCreateInfo * +get_descriptor_set_layout_info(VkDescriptorSetLayout descriptorSetLayout) +{ + unique_lock_t lock(global_lock); + auto it = descriptor_set_layouts.find(descriptorSetLayout); + lock.unlock(); + + if (it == descriptor_set_layouts.end()) + return nullptr; + return it->second; +} + +static void +destroy_descriptor_set_layout_info(VkDescriptorSetLayout descriptorSetLayout) +{ + unique_lock_t lock(global_lock); + auto it = descriptor_set_layouts.find(descriptorSetLayout); + if (it != descriptor_set_layouts.end()) { + free((void *)it->second->pBindings); + free(it->second); + } + descriptor_set_layouts.erase(descriptorSetLayout); + lock.unlock(); +} + +static void +record_descriptor_set_layout_info(VkDescriptorSetLayout *pSetLayout, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo) +{ + VkDescriptorSetLayoutCreateInfo *pSetLayoutInfo; + + pSetLayoutInfo = (VkDescriptorSetLayoutCreateInfo *) + malloc(sizeof(*pSetLayoutInfo)); + + memcpy(pSetLayoutInfo, pCreateInfo, sizeof(*pSetLayoutInfo)); + + if (pCreateInfo->bindingCount) { + VkDescriptorSetLayoutBinding *pBindings; + + pBindings = (VkDescriptorSetLayoutBinding *) + malloc(sizeof(*pBindings) * pCreateInfo->bindingCount); + + memcpy(pBindings, pCreateInfo->pBindings, + sizeof(*pBindings) * pCreateInfo->bindingCount); + pSetLayoutInfo->pBindings = pBindings; + } + + lock_guard_t lock(global_lock); + descriptor_set_layouts[*pSetLayout] = pSetLayoutInfo; +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_CreateDescriptorSetLayout( + VkDevice device, + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDescriptorSetLayout* pSetLayout) +{ + VkResult result; + + result = device_dispatch[GetKey(device)].CreateDescriptorSetLayout(device, + pCreateInfo, + pAllocator, + pSetLayout); + if (result == VK_SUCCESS) + record_descriptor_set_layout_info(pSetLayout, pCreateInfo); + return result; +} + +VK_LAYER_EXPORT void VKAPI_CALL +vkpipeline_db_DestroyDescriptorSetLayout( + VkDevice device, + VkDescriptorSetLayout descriptorSetLayout, + const VkAllocationCallbacks* pAllocator) +{ + destroy_descriptor_set_layout_info(descriptorSetLayout); + device_dispatch[GetKey(device)].DestroyDescriptorSetLayout(device, + descriptorSetLayout, + pAllocator); +} + +// pipeline layout +static const VkPipelineLayoutCreateInfo * +get_pipeline_layout_info(VkPipelineLayout pipelineLayout) +{ + unique_lock_t lock(global_lock); + auto it = pipeline_layouts.find(pipelineLayout); + lock.unlock(); + + if (it == pipeline_layouts.end()) + return nullptr; + return it->second; +} + +static void +destroy_pipeline_layout_info(VkPipelineLayout pipelineLayout) +{ + unique_lock_t lock(global_lock); + auto it = pipeline_layouts.find(pipelineLayout); + if (it != pipeline_layouts.end()) { + free((void *)it->second->pSetLayouts); + free((void *)it->second->pPushConstantRanges); + free(it->second); + } + pipeline_layouts.erase(pipelineLayout); + lock.unlock(); +} + +static void +record_pipeline_layout_info(VkPipelineLayout *pPipelineLayout, + const VkPipelineLayoutCreateInfo *pCreateInfo) +{ + VkPipelineLayoutCreateInfo *pPipelineLayoutInfo; + + pPipelineLayoutInfo = (VkPipelineLayoutCreateInfo *) + malloc(sizeof(*pPipelineLayoutInfo)); + + memcpy(pPipelineLayoutInfo, pCreateInfo, sizeof(*pPipelineLayoutInfo)); + + if (pCreateInfo->setLayoutCount) { + VkDescriptorSetLayout *pSetLayouts; + + pSetLayouts = (VkDescriptorSetLayout *) + malloc(sizeof(*pSetLayouts) * + pCreateInfo->setLayoutCount); + + memcpy(pSetLayouts, pCreateInfo->pSetLayouts, + sizeof(*pSetLayouts) * + pCreateInfo->setLayoutCount); + pPipelineLayoutInfo->pSetLayouts = pSetLayouts; + } + + if (pCreateInfo->pushConstantRangeCount) { + VkPushConstantRange *pPushConstantRanges; + + pPushConstantRanges = (VkPushConstantRange *) + malloc(sizeof(*pPushConstantRanges) * + pCreateInfo->pushConstantRangeCount); + + memcpy(pPushConstantRanges, pCreateInfo->pPushConstantRanges, + sizeof(*pPushConstantRanges) * + pCreateInfo->pushConstantRangeCount); + pPipelineLayoutInfo->pPushConstantRanges = pPushConstantRanges; + } + + lock_guard_t lock(global_lock); + pipeline_layouts[*pPipelineLayout] = pPipelineLayoutInfo; +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_CreatePipelineLayout( + VkDevice device, + const VkPipelineLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineLayout* pPipelineLayout) +{ + VkResult result; + + result = device_dispatch[GetKey(device)].CreatePipelineLayout(device, + pCreateInfo, + pAllocator, + pPipelineLayout); + if (result == VK_SUCCESS) + record_pipeline_layout_info(pPipelineLayout, pCreateInfo); + return result; +} + +VK_LAYER_EXPORT void VKAPI_CALL +vkpipeline_db_DestroyPipelineLayout( + VkDevice device, + VkPipelineLayout pipelineLayout, + const VkAllocationCallbacks* pAllocator) +{ + destroy_pipeline_layout_info(pipelineLayout); + device_dispatch[GetKey(device)].DestroyPipelineLayout(device, + pipelineLayout, + pAllocator); +} + +// render pass +static const VkRenderPassCreateInfo * +get_render_pass_info(VkRenderPass renderPass) +{ + unique_lock_t lock(global_lock); + auto it = render_passes.find(renderPass); + lock.unlock(); + + if (it == render_passes.end()) + return nullptr; + return it->second; +} + +static void +destroy_render_pass_info(VkRenderPass renderPass) +{ + unique_lock_t lock(global_lock); + auto it = render_passes.find(renderPass); + if (it != render_passes.end()) { + for (uint32_t i = 0; i < it->second->subpassCount; i++) { + const VkSubpassDescription *subpass = &it->second->pSubpasses[i]; + + free((void *)subpass->pInputAttachments); + free((void *)subpass->pColorAttachments); + free((void *)subpass->pDepthStencilAttachment); + free((void *)subpass->pPreserveAttachments); + } + free((void *)it->second->pAttachments); + free((void *)it->second->pSubpasses); + free((void *)it->second->pDependencies); + free(it->second); + } + render_passes.erase(renderPass); + lock.unlock(); +} + +static void +record_render_pass_info(VkRenderPass *pRenderPass, + const VkRenderPassCreateInfo *pCreateInfo) +{ + VkRenderPassCreateInfo *pRenderPassInfo; + + pRenderPassInfo = + (VkRenderPassCreateInfo *)malloc(sizeof(*pRenderPassInfo)); + + memcpy(pRenderPassInfo, pCreateInfo, sizeof(*pRenderPassInfo)); + + if (pCreateInfo->attachmentCount) { + VkAttachmentDescription *pAttachments = (VkAttachmentDescription *) + malloc(sizeof(*pAttachments) * pCreateInfo->attachmentCount); + + memcpy(pAttachments, pCreateInfo->pAttachments, + sizeof(*pAttachments) * pCreateInfo->attachmentCount); + pRenderPassInfo->pAttachments = pAttachments; + } + + if (pCreateInfo->subpassCount) { + VkSubpassDescription *pSubpasses = (VkSubpassDescription *) + calloc(pCreateInfo->subpassCount, sizeof(*pSubpasses)); + + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *subpass = &pCreateInfo->pSubpasses[i]; + + pSubpasses[i].flags = subpass->flags; + pSubpasses[i].pipelineBindPoint = subpass->pipelineBindPoint; + pSubpasses[i].inputAttachmentCount = subpass->inputAttachmentCount; + pSubpasses[i].colorAttachmentCount = subpass->colorAttachmentCount; + pSubpasses[i].preserveAttachmentCount = subpass->preserveAttachmentCount; + + if (subpass->inputAttachmentCount) { + VkAttachmentReference *pInputAttachments; + + pInputAttachments = (VkAttachmentReference *) + malloc(sizeof(*pInputAttachments) * subpass->inputAttachmentCount); + + memcpy(pInputAttachments, subpass->pInputAttachments, + sizeof(*pInputAttachments) * subpass->inputAttachmentCount); + + pSubpasses[i].pInputAttachments = pInputAttachments; + } + + if (subpass->colorAttachmentCount) { + VkAttachmentReference *pColorAttachments; + + pColorAttachments = (VkAttachmentReference *) + malloc(sizeof(*pColorAttachments) * subpass->colorAttachmentCount); + + memcpy(pColorAttachments, subpass->pColorAttachments, + sizeof(*pColorAttachments) * subpass->colorAttachmentCount); + + pSubpasses[i].pColorAttachments = pColorAttachments; + } + + if (subpass->colorAttachmentCount && subpass->pResolveAttachments) { + VkAttachmentReference *pResolveAttachments; + + pResolveAttachments = (VkAttachmentReference *) + malloc(sizeof(*pResolveAttachments) * subpass->colorAttachmentCount); + + memcpy(pResolveAttachments, subpass->pResolveAttachments, + sizeof(*pResolveAttachments) * subpass->colorAttachmentCount); + + pSubpasses[i].pResolveAttachments = pResolveAttachments; + } + + if (subpass->pDepthStencilAttachment) { + VkAttachmentReference *pDepthStencilAttachment; + + pDepthStencilAttachment = (VkAttachmentReference *) + malloc(sizeof(*pDepthStencilAttachment)); + + memcpy(pDepthStencilAttachment, subpass->pDepthStencilAttachment, + sizeof(*pDepthStencilAttachment)); + + pSubpasses[i].pDepthStencilAttachment = pDepthStencilAttachment; + } + if (subpass->preserveAttachmentCount) { + uint32_t *pPreserveAttachments; + + pPreserveAttachments = (uint32_t *) + malloc(sizeof(*pPreserveAttachments) * subpass->preserveAttachmentCount); + + memcpy(pPreserveAttachments, subpass->pPreserveAttachments, + sizeof(*pPreserveAttachments) * subpass->preserveAttachmentCount); + + pSubpasses[i].pPreserveAttachments = pPreserveAttachments; + } + } + + pRenderPassInfo->pSubpasses = pSubpasses; + } + + if (pCreateInfo->dependencyCount) { + VkSubpassDependency *pDependencies = (VkSubpassDependency *) + malloc(sizeof(*pDependencies) * pCreateInfo->dependencyCount); + + memcpy(pDependencies, pCreateInfo->pDependencies, + sizeof(*pDependencies) * pCreateInfo->dependencyCount); + pRenderPassInfo->pDependencies = pDependencies; + } + + lock_guard_t lock(global_lock); + render_passes[*pRenderPass] = pRenderPassInfo; +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_CreateRenderPass( + VkDevice device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass) +{ + VkResult result; + + result = device_dispatch[GetKey(device)].CreateRenderPass(device, + pCreateInfo, + pAllocator, + pRenderPass); + if (result == VK_SUCCESS) + record_render_pass_info(pRenderPass, pCreateInfo); + return result; +} + +VK_LAYER_EXPORT void VKAPI_CALL +vkpipeline_db_DestroyRenderPass( + VkDevice device, + VkRenderPass renderPass, + const VkAllocationCallbacks* pAllocator) +{ + destroy_render_pass_info(renderPass); + device_dispatch[GetKey(device)].DestroyRenderPass(device, + renderPass, + pAllocator); +} + +// shader module +static const VkShaderModuleCreateInfo * +get_shader_module_info(VkShaderModule shaderModule) +{ + unique_lock_t lock(global_lock); + auto it = shader_modules.find(shaderModule); + lock.unlock(); + + if (it == shader_modules.end()) + return nullptr; + return it->second; +} + +static void +destroy_shader_module_info(VkShaderModule shaderModule) +{ + unique_lock_t lock(global_lock); + auto it = shader_modules.find(shaderModule); + if (it != shader_modules.end()) { + free((void *)it->second->pCode); + free(it->second); + } + shader_modules.erase(shaderModule); + lock.unlock(); +} + +static void +record_shader_module_info(VkShaderModule *pShaderModule, + const VkShaderModuleCreateInfo *pCreateInfo) +{ + VkShaderModuleCreateInfo *pShaderModuleInfo; + uint32_t *pCode; + + pShaderModuleInfo = + (VkShaderModuleCreateInfo *)malloc(sizeof(*pShaderModuleInfo)); + + memcpy(pShaderModuleInfo, pCreateInfo, sizeof(*pShaderModuleInfo)); + + pCode = (uint32_t *)malloc(pCreateInfo->codeSize); + + memcpy(pCode, pCreateInfo->pCode, pCreateInfo->codeSize); + pShaderModuleInfo->pCode = pCode; + + lock_guard_t lock(global_lock); + shader_modules[*pShaderModule] = pShaderModuleInfo; +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_CreateShaderModule( + VkDevice device, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule) +{ + VkResult result; + + result = device_dispatch[GetKey(device)].CreateShaderModule(device, + pCreateInfo, + pAllocator, + pShaderModule); + if (result == VK_SUCCESS) + record_shader_module_info(pShaderModule, pCreateInfo); + return result; +} + +VK_LAYER_EXPORT void VKAPI_CALL +vkpipeline_db_DestroyShaderModule( + VkDevice device, + VkShaderModule shaderModule, + const VkAllocationCallbacks* pAllocator) +{ + destroy_shader_module_info(shaderModule); + device_dispatch[GetKey(device)].DestroyShaderModule(device, + shaderModule, + pAllocator); +} + +// graphics/compute pipeline +static const char * +get_capture_path() +{ + static const char *capture_path = NULL; + static bool read_env_var = false; + static bool print_error = false; + + if (!read_env_var) { + capture_path = getenv("VKPIPELINE_DB_CAPTURE_PATH"); + read_env_var = true; + } + + if (!capture_path) { + if (!print_error) { + fprintf(stderr, "Failed to capture because " + "VKPIPELINE_DB_CAPTURE_PATH is not set, aborted!\n"); + print_error = 1; + } + } + + return capture_path; +} + +static void +free_pipeline(struct pipeline_info *pipeline) +{ + free(pipeline->pShaderStagesInfo); + free(pipeline->pShaderModulesInfo); + free(pipeline->pSetLayoutsInfo); + free(pipeline); +} + +static int +capture_pipeline(struct pipeline_info *pipeline) +{ + const char *capture_path; + struct blob metadata; + char filename[1024]; + int ret = 0; + int fd; + + if (!pipeline) + return -1; + + capture_path = get_capture_path(); + if (!capture_path) + return -1; + + blob_init(&metadata); + serialize_pipeline(pipeline, &metadata); + free_pipeline(pipeline); + + snprintf(filename, sizeof(filename), + "%s/%d.pipeline_test", capture_path, pipeline_count++); + + fd = open(filename, O_WRONLY | O_CREAT, 0644); + if (fd == -1) { + perror("open"); + ret = -1; + goto fail_open; + } + + if (write(fd, metadata.data, metadata.size) == -1) { + perror("write"); + ret = -1; + goto fail_write; + } + +fail_write: + close(fd); +fail_open: + free(metadata.data); + + return ret; +} + +static struct pipeline_info * +get_graphics_pipeline_info(const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + struct pipeline_info *pipeline; + + pipeline = (struct pipeline_info *)calloc(1, sizeof(*pipeline)); + + pipeline->bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + pipeline->stageCount = pCreateInfo->stageCount; + + // shader stages and modules + pipeline->pShaderStagesInfo = (VkPipelineShaderStageCreateInfo *) + calloc(pipeline->stageCount, sizeof(*pipeline->pShaderStagesInfo)); + pipeline->pShaderModulesInfo = (VkShaderModuleCreateInfo *) + calloc(pipeline->stageCount, sizeof (*pipeline->pShaderModulesInfo)); + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i]; + const VkShaderModuleCreateInfo *pShaderModuleInfo = + get_shader_module_info(stage->module); + + pipeline->pShaderStagesInfo[i] = *stage; + pipeline->pShaderModulesInfo[i] = *pShaderModuleInfo; + } + + // graphics states + pipeline->vertexInputState = *pCreateInfo->pVertexInputState; + pipeline->inputAssemblyState = *pCreateInfo->pInputAssemblyState; + if (pCreateInfo->pTessellationState) + pipeline->tessellationState = *pCreateInfo->pTessellationState; + pipeline->viewportState = *pCreateInfo->pViewportState; + pipeline->rasterizationState = *pCreateInfo->pRasterizationState; + pipeline->multisampleState = *pCreateInfo->pMultisampleState; + pipeline->depthStencilState = *pCreateInfo->pDepthStencilState; + if (pCreateInfo->pColorBlendState) + pipeline->colorBlendState = *pCreateInfo->pColorBlendState; + if (pCreateInfo->pDynamicState) + pipeline->dynamicState = *pCreateInfo->pDynamicState; + + // pipeline layout + const VkPipelineLayoutCreateInfo *pPipelineLayoutInfo = + get_pipeline_layout_info(pCreateInfo->layout); + pipeline->pipelineLayoutInfo = *pPipelineLayoutInfo; + + pipeline->pSetLayoutsInfo = (VkDescriptorSetLayoutCreateInfo *) + malloc(sizeof(*pipeline->pSetLayoutsInfo) * pipeline->pipelineLayoutInfo.setLayoutCount); + + for (uint32_t i = 0; i < pPipelineLayoutInfo->setLayoutCount; i++) { + const VkDescriptorSetLayout *layout = + &pPipelineLayoutInfo->pSetLayouts[i]; + const VkDescriptorSetLayoutCreateInfo *pSetLayoutInfo = + get_descriptor_set_layout_info(*layout); + + pipeline->pSetLayoutsInfo[i] = *pSetLayoutInfo; + } + + // render pass + const VkRenderPassCreateInfo *pRenderPassInfo = + get_render_pass_info(pCreateInfo->renderPass); + pipeline->renderPassInfo = *pRenderPassInfo; + + return pipeline; +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_CreateGraphicsPipelines( + VkDevice device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result; + + result = device_dispatch[GetKey(device)].CreateGraphicsPipelines(device, + pipelineCache, + createInfoCount, + pCreateInfos, + pAllocator, + pPipelines); + if (result == VK_SUCCESS) { + for (uint32_t i = 0; i < createInfoCount; i++) { + struct pipeline_info *pipeline; + + pipeline = get_graphics_pipeline_info(&pCreateInfos[i]); + capture_pipeline(pipeline); + } + } + + return result; +} + +static struct pipeline_info * +get_compute_pipeline_info(const VkComputePipelineCreateInfo *pCreateInfo) +{ + struct pipeline_info *pipeline; + + pipeline = (struct pipeline_info *)calloc(1, sizeof(*pipeline)); + + pipeline->bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE; + pipeline->stageCount = 1; + + // shader stages and modules + const VkShaderModuleCreateInfo *pShaderModuleInfo = + get_shader_module_info(pCreateInfo->stage.module); + + pipeline->pShaderStagesInfo = (VkPipelineShaderStageCreateInfo *) + calloc(pipeline->stageCount, sizeof(*pipeline->pShaderStagesInfo)); + pipeline->pShaderModulesInfo = (VkShaderModuleCreateInfo *) + calloc(pipeline->stageCount, sizeof (*pipeline->pShaderModulesInfo)); + + pipeline->pShaderStagesInfo[0] = pCreateInfo->stage; + pipeline->pShaderModulesInfo[0] = *pShaderModuleInfo; + + // pipeline layout + const VkPipelineLayoutCreateInfo *pPipelineLayoutInfo = + get_pipeline_layout_info(pCreateInfo->layout); + pipeline->pipelineLayoutInfo = *pPipelineLayoutInfo; + + pipeline->pSetLayoutsInfo = (VkDescriptorSetLayoutCreateInfo *) + malloc(sizeof(*pipeline->pSetLayoutsInfo) * pipeline->pipelineLayoutInfo.setLayoutCount); + + for (uint32_t i = 0; i < pPipelineLayoutInfo->setLayoutCount; i++) { + const VkDescriptorSetLayout *layout = + &pPipelineLayoutInfo->pSetLayouts[i]; + const VkDescriptorSetLayoutCreateInfo *pSetLayoutInfo = + get_descriptor_set_layout_info(*layout); + + pipeline->pSetLayoutsInfo[i] = *pSetLayoutInfo; + } + + return pipeline; +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkpipeline_db_CreateComputePipelines( + VkDevice device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkComputePipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result; + + result = device_dispatch[GetKey(device)].CreateComputePipelines(device, + pipelineCache, + createInfoCount, + pCreateInfos, + pAllocator, + pPipelines); + if (result == VK_SUCCESS) { + for (uint32_t i = 0; i < createInfoCount; i++) { + struct pipeline_info *pipeline; + + pipeline = get_compute_pipeline_info(&pCreateInfos[i]); + capture_pipeline(pipeline); + } + } + + return result; +} + +VK_LAYER_EXPORT PFN_vkVoidFunction VKAPI_CALL +vkpipeline_db_GetDeviceProcAddr(VkDevice device, const char *pName); +VK_LAYER_EXPORT PFN_vkVoidFunction VKAPI_CALL +vkpipeline_db_GetInstanceProcAddr(VkInstance instance, const char *pName); + +#define FUNC(name) (void *)vkpipeline_db_##name + +// list of functions that are intercepted by this layer +static const struct { + const char *name; + void *ptr; +} funcs[] = { + { "vkGetInstanceProcAddr", FUNC(GetInstanceProcAddr) }, + { "vkEnumerateInstanceLayerProperties", FUNC(EnumerateInstanceLayerProperties) }, + { "vkEnumerateInstanceExtensionProperties", FUNC(EnumerateInstanceExtensionProperties) }, + { "vkCreateInstance", FUNC(CreateInstance) }, + { "vkDestroyInstance", FUNC(DestroyInstance) }, + { "vkGetDeviceProcAddr", FUNC(GetDeviceProcAddr) }, + { "vkEnumerateDeviceLayerProperties", FUNC(EnumerateDeviceLayerProperties) }, + { "vkEnumerateDeviceExtensionProperties", FUNC(EnumerateDeviceExtensionProperties) }, + { "vkCreateDevice", FUNC(CreateDevice) }, + { "vkDestroyDevice", FUNC(DestroyDevice) }, + { "vkCreateSampler", FUNC(CreateSampler) }, + { "vkDestroySampler", FUNC(DestroySampler) }, + { "vkCreateDescriptorSetLayout", FUNC(CreateDescriptorSetLayout) }, + { "vkDestroyDescriptorSetLayout", FUNC(DestroyDescriptorSetLayout) }, + { "vkCreatePipelineLayout", FUNC(CreatePipelineLayout) }, + { "vkDestroyPipelineLayout", FUNC(DestroyPipelineLayout) }, + { "vkCreateShaderModule", FUNC(CreateShaderModule) }, + { "vkDestroyShaderModule", FUNC(DestroyShaderModule) }, + { "vkCreateRenderPass", FUNC(CreateRenderPass) }, + { "vkDestroyRenderPass", FUNC(DestroyRenderPass) }, + { "vkCreateGraphicsPipelines", FUNC(CreateGraphicsPipelines) }, + { "vkCreateComputePipelines", FUNC(CreateComputePipelines) }, +}; + +#undef FUNC + +VK_LAYER_EXPORT PFN_vkVoidFunction VKAPI_CALL +vkpipeline_db_GetDeviceProcAddr( + VkDevice device, + const char *pName) +{ + for (uint32_t i = 0; i < sizeof(funcs) / sizeof(funcs[0]); i++) { + if (!strcmp(pName, funcs[i].name)) + return (PFN_vkVoidFunction)funcs[i].ptr; + } + + lock_guard_t l(global_lock); + return device_dispatch[GetKey(device)].GetDeviceProcAddr(device, pName); +} + +VK_LAYER_EXPORT PFN_vkVoidFunction VKAPI_CALL +vkpipeline_db_GetInstanceProcAddr( + VkInstance instance, + const char *pName) +{ + for (uint32_t i = 0; i < sizeof(funcs) / sizeof(funcs[0]); i++) { + if (!strcmp(pName, funcs[i].name)) + return (PFN_vkVoidFunction)funcs[i].ptr; + } + + lock_guard_t l(global_lock); + return instance_dispatch[GetKey(instance)].GetInstanceProcAddr(instance, pName); +} |