/* * Copyright © 2018 Valve Corporation * * Based in part on shader-db which is: * Copyright © 2014 Intel Corporation * Copyright © 2015 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "serialize.h" enum vendors { VENDOR_AMD = 0x1002, VENDOR_INTEL = 0x8086 }; enum vendors vendor; #define unlikely(x) __builtin_expect(!!(x), 0) int max_threads; const char **current_pipeline_names; VkDebugReportCallbackEXT *msgCallbacks; static pthread_mutex_t printf_mutex; #define sigputs(str) write(STDERR_FILENO, str, strlen(str)) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-result" static void abort_handler(int signo) { if (current_pipeline_names) { sigputs("\n => CRASHED <= while processing these pipelines:\n\n"); for (int i = 0; i < max_threads; i++) { if (current_pipeline_names[i]) { sigputs(" "); sigputs(current_pipeline_names[i]); sigputs("\n"); } } } else { sigputs("\n => CRASHED <= during final teardown.\n"); } sigputs("\n"); _exit(-1); } #pragma GCC diagnostic pop static VkBool32 callback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location, int32_t messageCode, const char* pLayerPrefix, const char* pMessage, void* pUserData) { assert(flags == VK_DEBUG_REPORT_DEBUG_BIT_EXT); (void) objectType; (void) object; (void) location; (void) messageCode; (void) pLayerPrefix; const char *shader_name = pUserData; printf("%s - %s\n", shader_name, pMessage); return VK_FALSE; } /* Pipeline tests. */ static unsigned pipeline_test_size = 1 << 15; /* next-pow-2(num pipelines in db) */ static unsigned pipeline_test_length; static struct pipeline_test { char *filename; off_t filesize; } *pipeline_test; static int gather_pipeline_test(const char *fpath, const struct stat *sb, int typeflag) { static const char *ext = ".pipeline_test"; if (strlen(fpath) >= strlen(ext) && memcmp(fpath + strlen(fpath) - strlen(ext), ext, strlen(ext)) == 0) { if (unlikely(!S_ISREG(sb->st_mode))) { fprintf(stderr, "ERROR: %s is not a regular file\n", fpath); return -1; } if (unlikely(pipeline_test_size <= pipeline_test_length)) { pipeline_test_size *= 2; pipeline_test = realloc(pipeline_test, pipeline_test_size * sizeof(struct pipeline_test)); } pipeline_test[pipeline_test_length].filename = malloc(strlen(fpath) + 1); memcpy(pipeline_test[pipeline_test_length].filename, fpath, strlen(fpath) + 1); pipeline_test[pipeline_test_length].filesize = sb->st_size; pipeline_test_length++; } return 0; } /* Shader stats */ static PFN_vkGetShaderInfoAMD vkGetShaderInfo = VK_NULL_HANDLE; static PFN_vkCreateDebugReportCallbackEXT createDebugReportCallback = VK_NULL_HANDLE; static PFN_vkDestroyDebugReportCallbackEXT destroyDebugReportCallback = VK_NULL_HANDLE; struct shader_stats { unsigned num_sgprs; unsigned num_vgprs; unsigned num_spilled_sgprs; unsigned num_spilled_vgprs; unsigned priv_mem_vgprs; unsigned code_size; unsigned lds; unsigned scratch; unsigned max_waves; }; #define PARSE_STAT(key, value) \ line = strtok_r(NULL, "\n", &saveptr); \ if (sscanf(line, key, value) != 1) \ return -1; static int amd_parse_shader_stats(char *buf, struct shader_stats *stats) { char *line; char *saveptr; line = strtok_r(buf, "\n", &saveptr); while(line) { if (!strcmp(line, "*** SHADER STATS ***")) break; line = strtok_r(NULL, "\n", &saveptr); } if (unlikely(!line)) return -1; PARSE_STAT("SGPRs: %d\n", &stats->num_sgprs); PARSE_STAT("VGPRs: %d\n", &stats->num_vgprs); PARSE_STAT("Spilled SGPRs: %d\n", &stats->num_spilled_sgprs); PARSE_STAT("Spilled VGPRs: %d\n", &stats->num_spilled_vgprs); PARSE_STAT("PrivMem VGPRs: %d\n", &stats->priv_mem_vgprs); PARSE_STAT("Code size: %d bytes\n", &stats->code_size); PARSE_STAT("LDS size: %d blocks\n", &stats->lds); PARSE_STAT("Scratch size: %d bytes per wave\n", &stats->scratch); PARSE_STAT("Subgroups per SIMD: %d\n", &stats->max_waves); return 0; } static bool is_shader_stage_valid(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stage) { VkResult result; size_t size; result = vkGetShaderInfo(device, pipeline, stage, VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD, &size, NULL); if (result == VK_ERROR_FEATURE_NOT_PRESENT){ /* The spec doesn't state what to do when the stage is invalid, and RADV * returns VK_ERROR_FEATURE_NOT_PRESENT in this situation, mostly for * merged shaders on GFX9. */ return false; } return true; } static int get_shader_info(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stage, char **shader_info) { VkResult result; size_t size; result = vkGetShaderInfo(device, pipeline, stage, VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD, &size, NULL); if (unlikely(result != VK_SUCCESS)) return -1; *shader_info = malloc(size); if (unlikely(!*shader_info)) return -1; result = vkGetShaderInfo(device, pipeline, stage, VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD, &size, *shader_info); if (unlikely(result != VK_SUCCESS)) return -1; return 0; } static const char * get_shader_stage_name(VkShaderStageFlagBits stage) { switch (stage) { case VK_SHADER_STAGE_VERTEX_BIT: return "VS"; case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return "TCS"; case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return "TES"; case VK_SHADER_STAGE_GEOMETRY_BIT: return "GS"; case VK_SHADER_STAGE_FRAGMENT_BIT: return "FS"; case VK_SHADER_STAGE_COMPUTE_BIT: return "CS"; default: return NULL; } } static void amd_print_shader_stats(const char *pipeline_name, VkShaderStageFlagBits stage, const struct shader_stats *stats) { pthread_mutex_lock(&printf_mutex); printf("%s (%s) - ", pipeline_name, get_shader_stage_name(stage)); printf("Shader Stats: "); printf("SGPRS: %d ", stats->num_sgprs); printf("VGPRS: %d ", stats->num_vgprs); printf("Code Size: %d ", stats->code_size); printf("LDS: %d ", stats->lds); printf("Scratch: %d ", stats->scratch); printf("Max Waves: %d ", stats->max_waves); printf("Spilled SGPRs: %d ", stats->num_spilled_sgprs); printf("Spilled VGPRs: %d ", stats->num_spilled_vgprs); printf("PrivMem VGPRs: %d ", stats->priv_mem_vgprs); printf("\n"); pthread_mutex_unlock(&printf_mutex); } static VkResult create_graphics_pipeline(VkDevice device, struct pipeline_info *info, VkPipelineLayout layout, VkPipeline *pipeline) { VkGraphicsPipelineCreateInfo pipelineInfo = {}; VkRenderPass renderPass = VK_NULL_HANDLE; VkResult result; /* Render pass. */ result = vkCreateRenderPass(device, &info->renderPassInfo, NULL, &renderPass); if (unlikely(result != VK_SUCCESS)) return result; /* Graphics pipeline. */ pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; pipelineInfo.stageCount = info->stageCount; pipelineInfo.pStages = info->pShaderStagesInfo; pipelineInfo.pVertexInputState = &info->vertexInputState; pipelineInfo.pInputAssemblyState = &info->inputAssemblyState; pipelineInfo.pTessellationState = info->tessellationState.sType == VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO ? &info->tessellationState : NULL; pipelineInfo.pViewportState = info->viewportState.sType == VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO ? &info->viewportState : NULL; pipelineInfo.pRasterizationState = &info->rasterizationState; pipelineInfo.pMultisampleState = info->multisampleState.sType == VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO ? &info->multisampleState : NULL; pipelineInfo.pDepthStencilState = info->depthStencilState.sType == VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO ? &info->depthStencilState : NULL; pipelineInfo.pColorBlendState = info->colorBlendState.sType == VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO ? &info->colorBlendState : NULL; pipelineInfo.pDynamicState = info->dynamicState.sType == VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO ? &info->dynamicState : NULL; pipelineInfo.layout = layout; pipelineInfo.renderPass = renderPass; result = vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, NULL, pipeline); vkDestroyRenderPass(device, renderPass, NULL); return result; } static int create_compute_pipeline(VkDevice device, struct pipeline_info *info, VkPipelineLayout layout, VkPipeline *pipeline) { VkComputePipelineCreateInfo pipelineInfo = {}; /* Compute pipeline. */ pipelineInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; pipelineInfo.stage = *info->pShaderStagesInfo; pipelineInfo.layout = layout; return vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, NULL, pipeline); } static int create_pipeline(VkDevice device, VkInstance instance, const char *pipeline_name, struct pipeline_info *info) { VkPipelineLayout layout = VK_NULL_HANDLE; VkPipeline pipeline = VK_NULL_HANDLE; VkResult result; int ret = 0; /* Shader modules. */ for (uint32_t i = 0; i < info->stageCount; i++) { result = vkCreateShaderModule(device, &info->pShaderModulesInfo[i], NULL, &info->pShaderStagesInfo[i].module); if (unlikely(result != VK_SUCCESS)) { ret = -1; goto fail; } } /* Descriptor set layouts. */ VkDescriptorSetLayout *pSetLayouts = calloc(info->pipelineLayoutInfo.setLayoutCount, sizeof(*pSetLayouts)); if (unlikely(!pSetLayouts)) { ret = -1; goto fail; } for (uint32_t i = 0; i < info->pipelineLayoutInfo.setLayoutCount; i++) { result = vkCreateDescriptorSetLayout(device, &info->pSetLayoutsInfo[i], NULL, &pSetLayouts[i]); if (unlikely(result != VK_SUCCESS)) { ret = -1; goto fail; } } /* Attach descriptor set layouts to the pipeline. */ info->pipelineLayoutInfo.pSetLayouts = pSetLayouts; /* Pipeline layout. */ result = vkCreatePipelineLayout(device, &info->pipelineLayoutInfo, NULL, &layout); if (unlikely(result != VK_SUCCESS)) { ret = -1; goto fail; } VkDebugReportCallbackCreateInfoEXT callbackInfo = {}; callbackInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; callbackInfo.pNext = NULL; callbackInfo.flags = VK_DEBUG_REPORT_DEBUG_BIT_EXT; callbackInfo.pfnCallback = callback; callbackInfo.pUserData = (char *) pipeline_name; if (vendor != VENDOR_AMD) { result = createDebugReportCallback(instance, &callbackInfo, NULL, &msgCallbacks[omp_get_thread_num()]); if (unlikely(result != VK_SUCCESS)) { ret = -1; goto fail; } } /* Graphics/Compute pipeline. */ if (info->bindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { result = create_graphics_pipeline(device, info, layout, &pipeline); } else { assert(info->bindPoint == VK_PIPELINE_BIND_POINT_COMPUTE); result = create_compute_pipeline(device, info, layout, &pipeline); } if (unlikely(result != VK_SUCCESS)) { ret = -1; goto fail; } /* Shader stats. */ if (vendor == VENDOR_AMD) { for (uint32_t i = 0; i < info->stageCount; i++) { VkPipelineShaderStageCreateInfo *pCreateInfo = &info->pShaderStagesInfo[i]; VkShaderStageFlagBits stage = pCreateInfo->stage; if (!is_shader_stage_valid(device, pipeline, stage)) continue; char *shader_info = NULL; ret = get_shader_info(device, pipeline, stage, &shader_info); if (unlikely(ret < 0)) { fprintf(stderr, "Failed to get shader info!\n"); goto fail; } struct shader_stats stats = {}; if (unlikely(amd_parse_shader_stats(shader_info, &stats) < 0)) { fprintf(stderr, "Failed to parse AMD shader statistics!\n"); } else { amd_print_shader_stats(pipeline_name, stage, &stats); } free(shader_info); } } if (vendor != VENDOR_AMD) { destroyDebugReportCallback(instance, msgCallbacks[omp_get_thread_num()], NULL); } fail: for (uint32_t i = 0; i < info->stageCount; i++) vkDestroyShaderModule(device, info->pShaderStagesInfo[i].module, NULL); for (uint32_t i = 0; i < info->pipelineLayoutInfo.setLayoutCount; i++) vkDestroyDescriptorSetLayout(device, pSetLayouts[i], NULL); vkDestroyPipelineLayout(device, layout, NULL); vkDestroyPipeline(device, pipeline, NULL); free(pSetLayouts); return ret; } static void free_pipeline(struct pipeline_info *pipeline) { for (uint32_t i = 0; i < pipeline->pipelineLayoutInfo.setLayoutCount; i++) { VkDescriptorSetLayoutCreateInfo *pInfo = &pipeline->pSetLayoutsInfo[i]; free((void *)pInfo->pBindings); } free(pipeline->pSetLayoutsInfo); free(pipeline->pShaderStagesInfo); free(pipeline->pShaderModulesInfo); free((void *)pipeline->renderPassInfo.pSubpasses); /* XXX*/ free(pipeline); } static int run(VkDevice device, VkInstance instance, const char *pipeline_name, const char *data, off_t size) { struct pipeline_info *pipeline; struct blob_reader metadata; int ret = 0; blob_reader_init(&metadata, data, size); pipeline = calloc(1, sizeof(*pipeline)); if (unlikely(!pipeline)) return -1; if (unlikely(!deserialize_pipeline(pipeline, &metadata))) { fprintf(stderr, "Failed to deserialize pipeline, corrupted data?\n"); return -1; } ret = create_pipeline(device, instance, pipeline_name, pipeline); if (unlikely(ret < 0)) { fprintf(stderr, "Failed to create pipeline!\n"); goto fail; } fail: free_pipeline(pipeline); return ret; } static void print_usage(const char *prog_name) { fprintf(stderr, "Usage: %s [-j ] " "\n", prog_name); } int main(int argc, char **argv) { const char *shader_info_ext[] = { "VK_AMD_shader_info" }; const char *debug_report_ext[] = { "VK_EXT_debug_report" }; int opt; int ret = 0; pthread_mutex_init(&printf_mutex, NULL); max_threads = omp_get_max_threads(); while ((opt = getopt(argc, argv, "j:")) != -1) { switch(opt) { case 'j': max_threads = atoi(optarg); break; default: fprintf(stderr, "Unknown option: %x\n", opt); print_usage(argv[0]); return -1; } } if (unlikely(optind >= argc)) { fprintf(stderr, "No directories specified\n"); print_usage(argv[0]); return -1; } /** * Runner. */ /* Gather all pipeline tests. */ pipeline_test = malloc(pipeline_test_size * sizeof(struct pipeline_test)); for (int i = optind; i < argc; i++) { ftw(argv[i], gather_pipeline_test, 100); } unsigned *pipline_counts = calloc(max_threads, sizeof(unsigned)); current_pipeline_names = calloc(max_threads, sizeof(const char *)); msgCallbacks = calloc(max_threads, sizeof(VkDebugReportCallbackEXT)); omp_set_num_threads(max_threads); if (signal(SIGABRT, abort_handler) == SIG_ERR) fprintf(stderr, "WARNING: could not install SIGABRT handler.\n"); if (signal(SIGSEGV, abort_handler) == SIG_ERR) fprintf(stderr, "WARNING: could not install SIGSEGV handler.\n"); setenv("ANV_ENABLE_PIPELINE_CACHE", "false", 1); #pragma omp parallel if(max_threads > 1 && pipeline_test_length > max_threads) { VkInstance instance; VkQueueFamilyProperties queue_family; VkPhysicalDevice *physical_devices; uint32_t device_count; uint32_t queue_count = 1; VkResult result; /** * Instance creation. */ VkApplicationInfo appInfo = {}; appInfo.pApplicationName = "vkpipeline-db"; VkInstanceCreateInfo instanceCreateInfo = {}; instanceCreateInfo.pApplicationInfo = &appInfo; instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; if (vendor != VENDOR_AMD) { instanceCreateInfo.enabledExtensionCount = 1; instanceCreateInfo.ppEnabledExtensionNames = debug_report_ext; } result = vkCreateInstance(&instanceCreateInfo, NULL, &instance); if (unlikely(result != VK_SUCCESS)) { if (result == VK_ERROR_EXTENSION_NOT_PRESENT) fprintf(stderr, "VK_EXT_debug_report is required!\n"); fprintf(stderr, "Failed to create instance (%d).\n", result); ret = -1; goto fail_device; } /** * Device creation. */ /* Get number of devices. */ result = vkEnumeratePhysicalDevices(instance, &device_count, NULL); fprintf(stderr, "Number of devices: %d\n", device_count); physical_devices = malloc(sizeof(*physical_devices) * device_count); /* Get physical devices. */ result = vkEnumeratePhysicalDevices(instance, &device_count, physical_devices); if (unlikely(result != VK_SUCCESS)) { fprintf(stderr, "Failed to enumerate physical devices (%d).\n", result); ret = -1; goto fail_device; } VkPhysicalDeviceProperties device_properties; vkGetPhysicalDeviceProperties(physical_devices[0], &device_properties); vendor = device_properties.vendorID; fprintf(stderr, "GPU: %s\n", device_properties.deviceName); /* Get queue properties. */ vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[0], &queue_count, &queue_family); assert(queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT); /* Create logical device. */ VkDevice device; VkDeviceQueueCreateInfo queueCreateInfo = {}; queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queueCreateInfo.queueFamilyIndex = 0; queueCreateInfo.queueCount = 1; VkDeviceCreateInfo deviceCreateInfo = {}; deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; deviceCreateInfo.queueCreateInfoCount = 1; deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo; if (vendor == VENDOR_AMD) { deviceCreateInfo.enabledExtensionCount = 1; deviceCreateInfo.ppEnabledExtensionNames = shader_info_ext; } result = vkCreateDevice(physical_devices[0], &deviceCreateInfo, NULL, &device); if (unlikely(result != VK_SUCCESS)) { if (result == VK_ERROR_EXTENSION_NOT_PRESENT) fprintf(stderr, "VK_AMD_shader_info is required!\n"); fprintf(stderr, "Failed to create device (%d).\n", result); ret = -1; goto fail_device; } if (vendor == VENDOR_AMD) { vkGetShaderInfo = (PFN_vkGetShaderInfoAMD)vkGetDeviceProcAddr(device, "vkGetShaderInfoAMD"); } else { createDebugReportCallback = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT"); destroyDebugReportCallback = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT"); } struct timespec start, end; clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); /* Process each pipeline tests. */ #pragma omp for schedule(dynamic) for (unsigned i = 0; i < pipeline_test_length; i++) { const char *current_pipeline_name = pipeline_test[i].filename; off_t filesize = pipeline_test[i].filesize; char *data; int fd; current_pipeline_names[omp_get_thread_num()] = current_pipeline_name; fprintf(stderr, "--> %s\n", current_pipeline_name); fd = open(current_pipeline_name, O_RDONLY); if (unlikely(fd == -1)) { perror("open"); continue; } data = mmap(NULL, filesize, PROT_READ, MAP_PRIVATE, fd, 0); if (unlikely(data == MAP_FAILED)) { perror("mmap"); continue; } if (unlikely(close(fd) == -1)) { perror("close"); continue; } if (unlikely(run(device, instance, current_pipeline_name, data, filesize) < 0)) continue; pipline_counts[omp_get_thread_num()]++; if (unlikely(munmap(data, filesize) == -1)) { perror("munmap"); continue; } current_pipeline_names[omp_get_thread_num()] = NULL; free(pipeline_test[i].filename); } clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); printf("Thread %d took %.2lf seconds and compiled %u pipelines\n", omp_get_thread_num(), (end.tv_sec - start.tv_sec) + 10e-9 * (end.tv_nsec - start.tv_nsec), pipline_counts[omp_get_thread_num()]); vkDestroyDevice(device, NULL); fail_device: free(physical_devices); vkDestroyInstance(instance, NULL); } free(pipeline_test); return ret; }