summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2021-06-04 14:17:42 -0700
committerIan Romanick <ian.d.romanick@intel.com>2021-06-04 14:19:57 -0700
commit433541c7be2fd433c8f2d59bf0dcef3ffaffa911 (patch)
tree5ea473990a5b6f8e414abb87e70917b8b33ded43
parent4c2a672634d576e031a61f2ff8727250aa86b043 (diff)
WIP: iris: Enable threaded shader compilationiris-threaded-shaders
I think this is pretty close to done. There are a couple FINISHME comments that need to be addressed, and a couple minor things that could be improved (e.g., dynamic allocation of the threaded_compile_job). I also want to do a lot more stress testing with real apps and shader-db.
-rw-r--r--src/gallium/drivers/iris/iris_context.c3
-rw-r--r--src/gallium/drivers/iris/iris_context.h3
-rw-r--r--src/gallium/drivers/iris/iris_program.c80
-rw-r--r--src/gallium/drivers/iris/iris_screen.c54
-rw-r--r--src/gallium/drivers/iris/iris_screen.h2
5 files changed, 134 insertions, 8 deletions
diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c
index 20ae4af49f7..2c7a6ef1a0e 100644
--- a/src/gallium/drivers/iris/iris_context.c
+++ b/src/gallium/drivers/iris/iris_context.c
@@ -44,6 +44,9 @@ iris_set_debug_callback(struct pipe_context *ctx,
const struct pipe_debug_callback *cb)
{
struct iris_context *ice = (struct iris_context *)ctx;
+ struct iris_screen *screen = (struct iris_screen *)ctx->screen;
+
+ util_queue_finish(&screen->shader_compiler_queue);
if (cb)
ice->dbg = *cb;
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h
index 1b0955d928b..fb5a00d1bb9 100644
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -415,6 +415,9 @@ struct iris_uncompiled_shader {
/** Lock for the variants list */
simple_mtx_t lock;
+
+ /** For parallel shader compiles */
+ struct util_queue_fence ready;
};
enum iris_surface_group {
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index a0fa6fe9dc7..656062ead9e 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -38,6 +38,7 @@
#include "util/u_atomic.h"
#include "util/u_upload_mgr.h"
#include "util/debug.h"
+#include "util/u_async_debug.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_serialize.h"
@@ -54,6 +55,14 @@
.base.tex.compressed_multisample_layout_mask = ~0, \
.base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
+struct threaded_compile_job {
+ struct iris_screen *screen;
+ struct u_upload_mgr *uploader;
+ struct pipe_debug_callback *dbg;
+ struct iris_uncompiled_shader *ish;
+ struct iris_compiled_shader *shader;
+};
+
static unsigned
get_new_program_id(struct iris_screen *screen)
{
@@ -1171,6 +1180,41 @@ find_or_add_variant(const struct iris_screen *screen,
return variant;
}
+static void
+threaded_compile_job_delete(void *_job, UNUSED int thread_index)
+{
+ free(_job);
+}
+
+static void
+iris_schedule_compile(struct iris_screen *screen,
+ struct util_queue_fence *ready_fence,
+ struct pipe_debug_callback *dbg,
+ struct threaded_compile_job *job,
+ util_queue_execute_func execute)
+
+{
+ util_queue_fence_init(ready_fence);
+
+ struct util_async_debug_callback async_debug;
+
+ if (dbg) {
+ u_async_debug_init(&async_debug);
+ job->dbg = &async_debug.base;
+ }
+
+ util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
+ threaded_compile_job_delete, 0);
+
+ if (screen->driconf.sync_compile || dbg)
+ util_queue_fence_wait(ready_fence);
+
+ if (dbg) {
+ u_async_debug_drain(&async_debug, dbg);
+ u_async_debug_cleanup(&async_debug);
+ }
+}
+
/**
* Compile a vertex shader, and upload the assembly.
*/
@@ -2406,12 +2450,17 @@ iris_create_compute_state(struct pipe_context *ctx,
}
static void
-iris_compile_shader(struct iris_screen *screen,
- struct u_upload_mgr *uploader,
- struct pipe_debug_callback *dbg,
- struct iris_uncompiled_shader *ish,
- struct iris_compiled_shader *shader)
+iris_compile_shader(void *_job, UNUSED int thread_index)
{
+ const struct threaded_compile_job *job =
+ (struct threaded_compile_job *) _job;
+
+ struct iris_screen *screen = job->screen;
+ struct u_upload_mgr *uploader = job->uploader;
+ struct pipe_debug_callback *dbg = job->dbg;
+ struct iris_uncompiled_shader *ish = job->ish;
+ struct iris_compiled_shader *shader = job->shader;
+
switch (ish->nir->info.stage) {
case MESA_SHADER_VERTEX:
iris_compile_vs(screen, uploader, dbg, ish, shader);
@@ -2560,9 +2609,23 @@ iris_create_shader_state(struct pipe_context *ctx,
(enum iris_program_cache_id) info->stage,
&key, key_size, &found);
- if (!found && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
- &key, key_size)) {
- iris_compile_shader(screen, uploader, &ice->dbg, ish, shader);
+ if (!found) {
+ assert(!util_queue_fence_is_signalled(&shader->ready));
+
+ if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
+ &key, key_size)) {
+ assert(!util_queue_fence_is_signalled(&shader->ready));
+
+ struct threaded_compile_job *job = calloc(1, sizeof(*job));
+
+ job->screen = screen;
+ job->uploader = uploader;
+ job->ish = ish;
+ job->shader = shader;
+
+ iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
+ iris_compile_shader);
+ }
}
}
@@ -2590,6 +2653,7 @@ iris_destroy_shader_state(struct pipe_context *ctx, void *state)
}
simple_mtx_destroy(&ish->lock);
+ util_queue_fence_destroy(&ish->ready);
ralloc_free(ish->nir);
free(ish);
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index 16c3f857b00..cacac162f5c 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -621,6 +621,7 @@ void
iris_screen_destroy(struct iris_screen *screen)
{
iris_destroy_screen_measure(screen);
+ util_queue_destroy(&screen->shader_compiler_queue);
glsl_type_singleton_decref();
iris_bo_unreference(screen->workaround_bo);
u_transfer_helper_destroy(screen->base.transfer_helper);
@@ -661,6 +662,38 @@ iris_get_disk_shader_cache(struct pipe_screen *pscreen)
return screen->disk_cache;
}
+static void
+iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
+ unsigned max_threads)
+{
+ struct iris_screen *screen = (struct iris_screen *) pscreen;
+ util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads);
+}
+
+static bool
+iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
+ void *v_shader,
+ enum pipe_shader_type p_stage)
+{
+ struct iris_screen *screen = (struct iris_screen *) pscreen;
+
+ /* Threaded compilation is only used for the precompile. If precompile is
+ * disabled, threaded compilation is "done."
+ */
+ if (!screen->precompile)
+ return true;
+
+ struct iris_uncompiled_shader *ish = v_shader;
+
+ /* When precompile is enabled, the first entry is the precompile variant.
+ * Check the ready fence of the precompile variant.
+ */
+ struct iris_compiled_shader *first =
+ list_first_entry(&ish->variants, struct iris_compiled_shader, link);
+
+ return util_queue_fence_is_signalled(&first->ready);
+}
+
static int
iris_getparam(int fd, int param, int *value)
{
@@ -878,10 +911,31 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
pscreen->query_memory_info = iris_query_memory_info;
pscreen->get_driver_query_group_info = iris_get_monitor_group_info;
pscreen->get_driver_query_info = iris_get_monitor_info;
+ pscreen->is_parallel_shader_compilation_finished = iris_is_parallel_shader_compilation_finished;
+ pscreen->set_max_shader_compiler_threads = iris_set_max_shader_compiler_threads;
genX_call(&screen->devinfo, init_screen_state, screen);
glsl_type_singleton_init_or_ref();
+ unsigned compiler_threads = 1;
+ unsigned hw_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+ if (hw_threads >= 12) {
+ compiler_threads = hw_threads * 3 /4;
+ } else if (hw_threads >= 6) {
+ compiler_threads = hw_threads - 2;
+ } else if (hw_threads >= 2) {
+ compiler_threads = hw_threads - 1;
+ }
+
+ if (!util_queue_init(&screen->shader_compiler_queue,
+ "sh", 64, compiler_threads,
+ UTIL_QUEUE_INIT_RESIZE_IF_FULL |
+ UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) {
+ /* FINISHME: Release resources. */
+ return NULL;
+ }
+
return pscreen;
}
diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h
index 503effc1d27..a1c0588ecdf 100644
--- a/src/gallium/drivers/iris/iris_screen.h
+++ b/src/gallium/drivers/iris/iris_screen.h
@@ -220,6 +220,8 @@ struct iris_screen {
struct iris_bo *workaround_bo;
struct iris_address workaround_address;
+ struct util_queue shader_compiler_queue;
+
struct disk_cache *disk_cache;
struct intel_measure_device measure;