diff options
author | Ian Romanick <ian.d.romanick@intel.com> | 2021-06-04 14:17:42 -0700 |
---|---|---|
committer | Ian Romanick <ian.d.romanick@intel.com> | 2021-06-04 14:19:57 -0700 |
commit | 433541c7be2fd433c8f2d59bf0dcef3ffaffa911 (patch) | |
tree | 5ea473990a5b6f8e414abb87e70917b8b33ded43 | |
parent | 4c2a672634d576e031a61f2ff8727250aa86b043 (diff) |
WIP: iris: Enable threaded shader compilationiris-threaded-shaders
I think this is pretty close to done. There are a couple FINISHME
comments that need to be addressed, and a couple minor things that could
be improved (e.g., dynamic allocation of the threaded_compile_job). I
also want to do a lot more stress testing with real apps and shader-db.
-rw-r--r-- | src/gallium/drivers/iris/iris_context.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_context.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_program.c | 80 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_screen.c | 54 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_screen.h | 2 |
5 files changed, 134 insertions, 8 deletions
diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c index 20ae4af49f7..2c7a6ef1a0e 100644 --- a/src/gallium/drivers/iris/iris_context.c +++ b/src/gallium/drivers/iris/iris_context.c @@ -44,6 +44,9 @@ iris_set_debug_callback(struct pipe_context *ctx, const struct pipe_debug_callback *cb) { struct iris_context *ice = (struct iris_context *)ctx; + struct iris_screen *screen = (struct iris_screen *)ctx->screen; + + util_queue_finish(&screen->shader_compiler_queue); if (cb) ice->dbg = *cb; diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 1b0955d928b..fb5a00d1bb9 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -415,6 +415,9 @@ struct iris_uncompiled_shader { /** Lock for the variants list */ simple_mtx_t lock; + + /** For parallel shader compiles */ + struct util_queue_fence ready; }; enum iris_surface_group { diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index a0fa6fe9dc7..656062ead9e 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -38,6 +38,7 @@ #include "util/u_atomic.h" #include "util/u_upload_mgr.h" #include "util/debug.h" +#include "util/u_async_debug.h" #include "compiler/nir/nir.h" #include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_serialize.h" @@ -54,6 +55,14 @@ .base.tex.compressed_multisample_layout_mask = ~0, \ .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0) +struct threaded_compile_job { + struct iris_screen *screen; + struct u_upload_mgr *uploader; + struct pipe_debug_callback *dbg; + struct iris_uncompiled_shader *ish; + struct iris_compiled_shader *shader; +}; + static unsigned get_new_program_id(struct iris_screen *screen) { @@ -1171,6 +1180,41 @@ find_or_add_variant(const struct iris_screen *screen, return variant; } +static void +threaded_compile_job_delete(void *_job, UNUSED int thread_index) +{ + free(_job); +} + +static void +iris_schedule_compile(struct iris_screen *screen, + struct util_queue_fence *ready_fence, + struct pipe_debug_callback *dbg, + struct threaded_compile_job *job, + util_queue_execute_func execute) + +{ + util_queue_fence_init(ready_fence); + + struct util_async_debug_callback async_debug; + + if (dbg) { + u_async_debug_init(&async_debug); + job->dbg = &async_debug.base; + } + + util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute, + threaded_compile_job_delete, 0); + + if (screen->driconf.sync_compile || dbg) + util_queue_fence_wait(ready_fence); + + if (dbg) { + u_async_debug_drain(&async_debug, dbg); + u_async_debug_cleanup(&async_debug); + } +} + /** * Compile a vertex shader, and upload the assembly. */ @@ -2406,12 +2450,17 @@ iris_create_compute_state(struct pipe_context *ctx, } static void -iris_compile_shader(struct iris_screen *screen, - struct u_upload_mgr *uploader, - struct pipe_debug_callback *dbg, - struct iris_uncompiled_shader *ish, - struct iris_compiled_shader *shader) +iris_compile_shader(void *_job, UNUSED int thread_index) { + const struct threaded_compile_job *job = + (struct threaded_compile_job *) _job; + + struct iris_screen *screen = job->screen; + struct u_upload_mgr *uploader = job->uploader; + struct pipe_debug_callback *dbg = job->dbg; + struct iris_uncompiled_shader *ish = job->ish; + struct iris_compiled_shader *shader = job->shader; + switch (ish->nir->info.stage) { case MESA_SHADER_VERTEX: iris_compile_vs(screen, uploader, dbg, ish, shader); @@ -2560,9 +2609,23 @@ iris_create_shader_state(struct pipe_context *ctx, (enum iris_program_cache_id) info->stage, &key, key_size, &found); - if (!found && !iris_disk_cache_retrieve(screen, uploader, ish, shader, - &key, key_size)) { - iris_compile_shader(screen, uploader, &ice->dbg, ish, shader); + if (!found) { + assert(!util_queue_fence_is_signalled(&shader->ready)); + + if (!iris_disk_cache_retrieve(screen, uploader, ish, shader, + &key, key_size)) { + assert(!util_queue_fence_is_signalled(&shader->ready)); + + struct threaded_compile_job *job = calloc(1, sizeof(*job)); + + job->screen = screen; + job->uploader = uploader; + job->ish = ish; + job->shader = shader; + + iris_schedule_compile(screen, &ish->ready, &ice->dbg, job, + iris_compile_shader); + } } } @@ -2590,6 +2653,7 @@ iris_destroy_shader_state(struct pipe_context *ctx, void *state) } simple_mtx_destroy(&ish->lock); + util_queue_fence_destroy(&ish->ready); ralloc_free(ish->nir); free(ish); diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 16c3f857b00..cacac162f5c 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -621,6 +621,7 @@ void iris_screen_destroy(struct iris_screen *screen) { iris_destroy_screen_measure(screen); + util_queue_destroy(&screen->shader_compiler_queue); glsl_type_singleton_decref(); iris_bo_unreference(screen->workaround_bo); u_transfer_helper_destroy(screen->base.transfer_helper); @@ -661,6 +662,38 @@ iris_get_disk_shader_cache(struct pipe_screen *pscreen) return screen->disk_cache; } +static void +iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen, + unsigned max_threads) +{ + struct iris_screen *screen = (struct iris_screen *) pscreen; + util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads); +} + +static bool +iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen, + void *v_shader, + enum pipe_shader_type p_stage) +{ + struct iris_screen *screen = (struct iris_screen *) pscreen; + + /* Threaded compilation is only used for the precompile. If precompile is + * disabled, threaded compilation is "done." + */ + if (!screen->precompile) + return true; + + struct iris_uncompiled_shader *ish = v_shader; + + /* When precompile is enabled, the first entry is the precompile variant. + * Check the ready fence of the precompile variant. + */ + struct iris_compiled_shader *first = + list_first_entry(&ish->variants, struct iris_compiled_shader, link); + + return util_queue_fence_is_signalled(&first->ready); +} + static int iris_getparam(int fd, int param, int *value) { @@ -878,10 +911,31 @@ iris_screen_create(int fd, const struct pipe_screen_config *config) pscreen->query_memory_info = iris_query_memory_info; pscreen->get_driver_query_group_info = iris_get_monitor_group_info; pscreen->get_driver_query_info = iris_get_monitor_info; + pscreen->is_parallel_shader_compilation_finished = iris_is_parallel_shader_compilation_finished; + pscreen->set_max_shader_compiler_threads = iris_set_max_shader_compiler_threads; genX_call(&screen->devinfo, init_screen_state, screen); glsl_type_singleton_init_or_ref(); + unsigned compiler_threads = 1; + unsigned hw_threads = sysconf(_SC_NPROCESSORS_ONLN); + + if (hw_threads >= 12) { + compiler_threads = hw_threads * 3 /4; + } else if (hw_threads >= 6) { + compiler_threads = hw_threads - 2; + } else if (hw_threads >= 2) { + compiler_threads = hw_threads - 1; + } + + if (!util_queue_init(&screen->shader_compiler_queue, + "sh", 64, compiler_threads, + UTIL_QUEUE_INIT_RESIZE_IF_FULL | + UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) { + /* FINISHME: Release resources. */ + return NULL; + } + return pscreen; } diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h index 503effc1d27..a1c0588ecdf 100644 --- a/src/gallium/drivers/iris/iris_screen.h +++ b/src/gallium/drivers/iris/iris_screen.h @@ -220,6 +220,8 @@ struct iris_screen { struct iris_bo *workaround_bo; struct iris_address workaround_address; + struct util_queue shader_compiler_queue; + struct disk_cache *disk_cache; struct intel_measure_device measure; |