diff options
author | Alyssa Rosenzweig <alyssa@rosenzweig.io> | 2024-03-07 13:47:14 -0400 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2024-03-12 18:17:17 +0000 |
commit | a6123a80dab3da3b52125bf42821a4c8be2c93b4 (patch) | |
tree | ef9d7faa6b12d2748708d5d1375738c9c984ad8c | |
parent | aa99753a283becc5fbf554a11a9b01594a278a2e (diff) |
nir/opt_shrink_vectors: shrink some intrinsics from start
If the backend supports it, intrinsics with a component() are straightforward to
shrink from the start. Notably helps vectorized I/O.
v2: add an option for this and enable only on grown up backends, because some
backends ignore the component() parameter.
RADV GFX11:
Totals from 921 (1.16% of 79439) affected shaders:
Instrs: 616558 -> 615529 (-0.17%); split: -0.30%, +0.14%
CodeSize: 3099864 -> 3095632 (-0.14%); split: -0.25%, +0.11%
Latency: 2177075 -> 2160966 (-0.74%); split: -0.79%, +0.05%
InvThroughput: 299997 -> 298664 (-0.44%); split: -0.47%, +0.02%
VClause: 16343 -> 16395 (+0.32%); split: -0.01%, +0.32%
SClause: 10715 -> 10714 (-0.01%)
Copies: 24736 -> 24701 (-0.14%); split: -0.37%, +0.23%
PreVGPRs: 30179 -> 30173 (-0.02%)
VALU: 353472 -> 353439 (-0.01%); split: -0.03%, +0.02%
SALU: 40323 -> 40322 (-0.00%)
VMEM: 25353 -> 25352 (-0.00%)
AGX:
total instructions in shared programs: 2038217 -> 2038049 (<.01%)
instructions in affected programs: 10249 -> 10081 (-1.64%)
total alu in shared programs: 1593094 -> 1592939 (<.01%)
alu in affected programs: 7145 -> 6990 (-2.17%)
total fscib in shared programs: 1589254 -> 1589102 (<.01%)
fscib in affected programs: 7217 -> 7065 (-2.11%)
total bytes in shared programs: 13975666 -> 13974722 (<.01%)
bytes in affected programs: 65942 -> 64998 (-1.43%)
total regs in shared programs: 592758 -> 591187 (-0.27%)
regs in affected programs: 6936 -> 5365 (-22.65%)
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> (v1)
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28004>
-rw-r--r-- | src/amd/vulkan/nir/radv_nir_lower_io.c | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_pipeline.c | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader.c | 2 | ||||
-rw-r--r-- | src/asahi/clc/asahi_clc.c | 2 | ||||
-rw-r--r-- | src/asahi/compiler/agx_compile.c | 2 | ||||
-rw-r--r-- | src/compiler/nir/nir.h | 2 | ||||
-rw-r--r-- | src/compiler/nir/nir_opt_shrink_vectors.c | 54 | ||||
-rw-r--r-- | src/compiler/nir/tests/opt_shrink_vectors_tests.cpp | 22 | ||||
-rw-r--r-- | src/gallium/auxiliary/nir/nir_to_tgsi.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_screen.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r300/compiler/nir_to_rc.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r300/compiler/r300_nir.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/zink/zink_compiler.c | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_nir.c | 2 | ||||
-rw-r--r-- | src/intel/compiler/elk/elk_nir.c | 4 | ||||
-rw-r--r-- | src/nouveau/compiler/nak_nir.c | 2 | ||||
-rw-r--r-- | src/nouveau/vulkan/nvk_codegen.c | 2 | ||||
-rw-r--r-- | src/panfrost/compiler/bifrost_compile.c | 2 |
19 files changed, 69 insertions, 43 deletions
diff --git a/src/amd/vulkan/nir/radv_nir_lower_io.c b/src/amd/vulkan/nir/radv_nir_lower_io.c index d9cebde0367..839378fb1bb 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_io.c +++ b/src/amd/vulkan/nir/radv_nir_lower_io.c @@ -45,7 +45,7 @@ radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask) if (progress) { /* Optimize the new vector code and then remove dead vars */ NIR_PASS(_, nir, nir_copy_prop); - NIR_PASS(_, nir, nir_opt_shrink_vectors); + NIR_PASS(_, nir, nir_opt_shrink_vectors, true); if (mask & nir_var_shader_out) { /* Optimize swizzled movs of load_const for nir_link_opt_varyings's constant propagation. */ diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 626fe4cbd80..61cd8c1a7e5 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -562,7 +562,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS_V(stage->nir, radv_nir_apply_pipeline_layout, device, stage); if (!stage->key.optimisations_disabled) { - NIR_PASS(_, stage->nir, nir_opt_shrink_vectors); + NIR_PASS(_, stage->nir, nir_opt_shrink_vectors, true); } NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index bc0ec08d5c2..cec1b1f7196 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -178,7 +178,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively) } while (progress && !optimize_conservatively); _mesa_set_destroy(skip, NULL); - NIR_PASS(progress, shader, nir_opt_shrink_vectors); + NIR_PASS(progress, shader, nir_opt_shrink_vectors, true); NIR_PASS(progress, shader, nir_remove_dead_variables, nir_var_function_temp | nir_var_shader_in | nir_var_shader_out | nir_var_mem_shared, NULL); diff --git a/src/asahi/clc/asahi_clc.c b/src/asahi/clc/asahi_clc.c index 3c40ec81569..9db43a959d8 100644 --- a/src/asahi/clc/asahi_clc.c +++ b/src/asahi/clc/asahi_clc.c @@ -147,7 +147,7 @@ optimize(nir_shader *nir) NIR_PASS(progress, nir, nir_opt_undef); NIR_PASS(progress, nir, nir_lower_undef_to_zero); - NIR_PASS(progress, nir, nir_opt_shrink_vectors); + NIR_PASS(progress, nir, nir_opt_shrink_vectors, true); NIR_PASS(progress, nir, nir_opt_loop_unroll); NIR_PASS(progress, nir, nir_split_var_copies); diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 4c773b4c019..7c013d7b28c 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -2396,7 +2396,7 @@ agx_optimize_loop_nir(nir_shader *nir) NIR_PASS(progress, nir, nir_opt_algebraic); NIR_PASS(progress, nir, nir_opt_constant_folding); NIR_PASS(progress, nir, nir_opt_undef); - NIR_PASS(progress, nir, nir_opt_shrink_vectors); + NIR_PASS(progress, nir, nir_opt_shrink_vectors, true); NIR_PASS(progress, nir, nir_opt_loop_unroll); } while (progress); } diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 6053764b771..aadfefe0304 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6406,7 +6406,7 @@ bool nir_opt_phi_precision(nir_shader *shader); bool nir_opt_shrink_stores(nir_shader *shader, bool shrink_image_store); -bool nir_opt_shrink_vectors(nir_shader *shader); +bool nir_opt_shrink_vectors(nir_shader *shader, bool shrink_start); bool nir_opt_undef(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_shrink_vectors.c b/src/compiler/nir/nir_opt_shrink_vectors.c index 8164ba7ce8c..8e8da5e62ee 100644 --- a/src/compiler/nir/nir_opt_shrink_vectors.c +++ b/src/compiler/nir/nir_opt_shrink_vectors.c @@ -33,7 +33,7 @@ * For memory loads, while it can be tricky to eliminate unused low components * or channels in the middle of a writemask (you might need to increment some * offset from a load_uniform, for example), it is trivial to just drop the - * trailing components. + * trailing components. This pass shrinks low components on select intrinsics. * For vector ALU and load_const, only used by other ALU instructions, * this pass eliminates arbitrary channels as well as duplicate channels, * and reswizzles the uses. @@ -84,7 +84,7 @@ is_only_used_by_alu(nir_def *def) } static bool -shrink_dest_to_read_mask(nir_def *def) +shrink_dest_to_read_mask(nir_def *def, bool shrink_start) { /* early out if there's nothing to do. */ if (def->num_components == 1) @@ -97,18 +97,42 @@ shrink_dest_to_read_mask(nir_def *def) } unsigned mask = nir_def_components_read(def); - int last_bit = util_last_bit(mask); /* If nothing was read, leave it up to DCE. */ if (!mask) return false; - unsigned rounded = round_up_components(last_bit); + nir_intrinsic_instr *intr = NULL; + if (def->parent_instr->type == nir_instr_type_intrinsic) + intr = nir_instr_as_intrinsic(def->parent_instr); + + shrink_start &= (intr != NULL) && nir_intrinsic_has_component(intr) && + is_only_used_by_alu(def); + + int last_bit = util_last_bit(mask); + int first_bit = shrink_start ? (ffs(mask) - 1) : 0; + + const unsigned comps = last_bit - first_bit; + const unsigned rounded = round_up_components(comps); assert(rounded <= def->num_components); - last_bit = rounded; - if (def->num_components > last_bit) { - def->num_components = last_bit; + if ((def->num_components > rounded) || first_bit > 0) { + def->num_components = rounded; + + if (first_bit) { + assert(shrink_start); + + nir_intrinsic_set_component(intr, nir_intrinsic_component(intr) + first_bit); + + /* Reswizzle sources, which must be ALU since they have swizzle */ + uint8_t swizzle[NIR_MAX_VEC_COMPONENTS] = { 0 }; + for (unsigned i = 0; i < comps; ++i) { + swizzle[first_bit + i] = i; + } + + reswizzle_alu_uses(def, swizzle); + } + return true; } @@ -281,7 +305,8 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr) } static bool -opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr) +opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, + bool shrink_start) { switch (instr->intrinsic) { case nir_intrinsic_load_uniform: @@ -302,7 +327,7 @@ opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr) assert(instr->num_components != 0); /* Trim the dest to the used channels */ - if (!shrink_dest_to_read_mask(&instr->def)) + if (!shrink_dest_to_read_mask(&instr->def, shrink_start)) return false; instr->num_components = instr->def.num_components; @@ -397,7 +422,7 @@ opt_shrink_vectors_load_const(nir_load_const_instr *instr) static bool opt_shrink_vectors_ssa_undef(nir_undef_instr *instr) { - return shrink_dest_to_read_mask(&instr->def); + return shrink_dest_to_read_mask(&instr->def, false); } static bool @@ -497,7 +522,7 @@ opt_shrink_vectors_phi(nir_builder *b, nir_phi_instr *instr) } static bool -opt_shrink_vectors_instr(nir_builder *b, nir_instr *instr) +opt_shrink_vectors_instr(nir_builder *b, nir_instr *instr, bool shrink_start) { b->cursor = nir_before_instr(instr); @@ -509,7 +534,8 @@ opt_shrink_vectors_instr(nir_builder *b, nir_instr *instr) return opt_shrink_vectors_tex(b, nir_instr_as_tex(instr)); case nir_instr_type_intrinsic: - return opt_shrink_vectors_intrinsic(b, nir_instr_as_intrinsic(instr)); + return opt_shrink_vectors_intrinsic(b, nir_instr_as_intrinsic(instr), + shrink_start); case nir_instr_type_load_const: return opt_shrink_vectors_load_const(nir_instr_as_load_const(instr)); @@ -528,7 +554,7 @@ opt_shrink_vectors_instr(nir_builder *b, nir_instr *instr) } bool -nir_opt_shrink_vectors(nir_shader *shader) +nir_opt_shrink_vectors(nir_shader *shader, bool shrink_start) { bool progress = false; @@ -537,7 +563,7 @@ nir_opt_shrink_vectors(nir_shader *shader) nir_foreach_block_reverse(block, impl) { nir_foreach_instr_reverse(instr, block) { - progress |= opt_shrink_vectors_instr(&b, instr); + progress |= opt_shrink_vectors_instr(&b, instr, shrink_start); } } diff --git a/src/compiler/nir/tests/opt_shrink_vectors_tests.cpp b/src/compiler/nir/tests/opt_shrink_vectors_tests.cpp index 85e3032543b..82e31dc2eba 100644 --- a/src/compiler/nir/tests/opt_shrink_vectors_tests.cpp +++ b/src/compiler/nir/tests/opt_shrink_vectors_tests.cpp @@ -93,7 +93,7 @@ TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_load_const_trailing_compo nir_store_var(b, out_var, alu_result, 1); - ASSERT_TRUE(nir_opt_shrink_vectors(b->shader)); + ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true)); nir_validate_shader(b->shader, NULL); @@ -101,7 +101,7 @@ TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_load_const_trailing_compo nir_load_const_instr * imm_vec_instr = nir_instr_as_load_const(imm_vec->parent_instr); ASSERT_TRUE(nir_const_value_as_float(imm_vec_instr->value[0], 32) == 1.0); - ASSERT_FALSE(nir_opt_shrink_vectors(b->shader)); + ASSERT_FALSE(nir_opt_shrink_vectors(b->shader, true)); } TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_alu_trailing_component_only) @@ -130,14 +130,14 @@ TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_alu_trailing_component_on nir_store_var(b, out_var, alu2_result, 1); - ASSERT_TRUE(nir_opt_shrink_vectors(b->shader)); + ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true)); nir_validate_shader(b->shader, NULL); check_swizzle(&alu_instr->src[0], "x"); ASSERT_TRUE(alu_result->num_components == 1); - ASSERT_FALSE(nir_opt_shrink_vectors(b->shader)); + ASSERT_FALSE(nir_opt_shrink_vectors(b->shader, true)); } TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_simple) @@ -170,7 +170,7 @@ TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_simple) nir_store_var(b, out_var, alu2_result, 1); - ASSERT_TRUE(nir_opt_shrink_vectors(b->shader)); + ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true)); nir_validate_shader(b->shader, NULL); @@ -186,7 +186,7 @@ TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_simple) check_swizzle(&alu2_instr->src[0], "xxy"); check_swizzle(&alu2_instr->src[1], "xxy"); - ASSERT_FALSE(nir_opt_shrink_vectors(b->shader)); + ASSERT_FALSE(nir_opt_shrink_vectors(b->shader, true)); nir_validate_shader(b->shader, NULL); } @@ -235,7 +235,7 @@ TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_vec8) nir_store_var(b, out_var, alu2_result, 1); - ASSERT_TRUE(nir_opt_shrink_vectors(b->shader)); + ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true)); nir_validate_shader(b->shader, NULL); @@ -255,7 +255,7 @@ TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_vec8) check_swizzle(&alu2_instr->src[0], "abbcdefg"); check_swizzle(&alu2_instr->src[1], "abbcdefg"); - ASSERT_FALSE(nir_opt_shrink_vectors(b->shader)); + ASSERT_FALSE(nir_opt_shrink_vectors(b->shader, true)); nir_validate_shader(b->shader, NULL); } @@ -357,7 +357,7 @@ TEST_F(nir_opt_shrink_vectors_test, opt_shrink_phis_loop_simple) nir_validate_shader(b->shader, NULL); - ASSERT_TRUE(nir_opt_shrink_vectors(b->shader)); + ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true)); ASSERT_TRUE(phi_def->num_components == 1); check_swizzle(&fge_alu_instr->src[0], "x"); check_swizzle(&fadd_alu_instr->src[0], "x"); @@ -461,7 +461,7 @@ TEST_F(nir_opt_shrink_vectors_test, opt_shrink_phis_loop_swizzle) nir_validate_shader(b->shader, NULL); - ASSERT_TRUE(nir_opt_shrink_vectors(b->shader)); + ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true)); ASSERT_TRUE(phi_def->num_components == 2); check_swizzle(&fge_alu_instr->src[0], "y"); @@ -574,6 +574,6 @@ TEST_F(nir_opt_shrink_vectors_test, opt_shrink_phis_loop_phi_out) nir_validate_shader(b->shader, NULL); - ASSERT_FALSE(nir_opt_shrink_vectors(b->shader)); + ASSERT_FALSE(nir_opt_shrink_vectors(b->shader, true)); ASSERT_TRUE(phi_def->num_components == 4); } diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index 14112ee71aa..99b2c70c17a 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -3343,7 +3343,7 @@ ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen, }; NIR_PASS(progress, s, nir_opt_load_store_vectorize, &vectorize_opts); NIR_PASS(progress, s, nir_opt_shrink_stores, true); - NIR_PASS(progress, s, nir_opt_shrink_vectors); + NIR_PASS(progress, s, nir_opt_shrink_vectors, false); NIR_PASS(progress, s, nir_opt_loop); NIR_PASS(progress, s, nir_opt_vectorize, ntt_should_vectorize_instr, NULL); NIR_PASS(progress, s, nir_opt_undef); diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c index 00fee4dd72f..377f4cc518d 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c @@ -148,7 +148,7 @@ etna_optimize_loop(nir_shader *s) NIR_PASS_V(s, nir_lower_vars_to_ssa); progress |= OPT(s, nir_opt_copy_prop_vars); progress |= OPT(s, nir_opt_shrink_stores, true); - progress |= OPT(s, nir_opt_shrink_vectors); + progress |= OPT(s, nir_opt_shrink_vectors, false); progress |= OPT(s, nir_copy_prop); progress |= OPT(s, nir_opt_dce); progress |= OPT(s, nir_opt_cse); diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index bfa7ea04b9f..8b4746bf49c 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -209,7 +209,7 @@ i915_optimize_nir(struct nir_shader *s) NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_shrink_stores, true); - NIR_PASS(progress, s, nir_opt_shrink_vectors); + NIR_PASS(progress, s, nir_opt_shrink_vectors, false); NIR_PASS(progress, s, nir_opt_loop); NIR_PASS(progress, s, nir_opt_undef); NIR_PASS(progress, s, nir_opt_loop_unroll); diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c b/src/gallium/drivers/r300/compiler/nir_to_rc.c index d909389f03e..59dd6feec6e 100644 --- a/src/gallium/drivers/r300/compiler/nir_to_rc.c +++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c @@ -2418,7 +2418,7 @@ const void *nir_to_rc_options(struct nir_shader *s, NIR_PASS_V(s, nir_opt_vectorize, ntr_should_vectorize_instr, NULL); do { progress = false; - NIR_PASS(progress, s, nir_opt_shrink_vectors); + NIR_PASS(progress, s, nir_opt_shrink_vectors, false); } while (progress); NIR_PASS_V(s, nir_copy_prop); NIR_PASS_V(s, nir_opt_cse); diff --git a/src/gallium/drivers/r300/compiler/r300_nir.c b/src/gallium/drivers/r300/compiler/r300_nir.c index 8b3ff1d378a..fb1e412d5f5 100644 --- a/src/gallium/drivers/r300/compiler/r300_nir.c +++ b/src/gallium/drivers/r300/compiler/r300_nir.c @@ -150,7 +150,7 @@ r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen) }; NIR_PASS(progress, s, nir_opt_load_store_vectorize, &vectorize_opts); NIR_PASS(progress, s, nir_opt_shrink_stores, true); - NIR_PASS(progress, s, nir_opt_shrink_vectors); + NIR_PASS(progress, s, nir_opt_shrink_vectors, false); NIR_PASS(progress, s, nir_opt_loop); NIR_PASS(progress, s, nir_opt_vectorize, r300_should_vectorize_instr, NULL); NIR_PASS(progress, s, nir_opt_undef); diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 844f4d36983..20ddc8ad55f 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -1487,7 +1487,7 @@ optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink) if (zs) NIR_PASS(progress, s, bound_bo_access, zs); if (can_shrink) - NIR_PASS(progress, s, nir_opt_shrink_vectors); + NIR_PASS(progress, s, nir_opt_shrink_vectors, false); } while (progress); do { diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index dbadd56d52f..cb67f538ebd 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1667,7 +1667,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, * vec1 ssa_2 = ffma ssa_1, ... */ if (OPT(intel_nir_opt_peephole_ffma)) - OPT(nir_opt_shrink_vectors); + OPT(nir_opt_shrink_vectors, false); OPT(intel_nir_opt_peephole_imul32x16); diff --git a/src/intel/compiler/elk/elk_nir.c b/src/intel/compiler/elk/elk_nir.c index 0720bf307fd..94e10452543 100644 --- a/src/intel/compiler/elk/elk_nir.c +++ b/src/intel/compiler/elk/elk_nir.c @@ -632,7 +632,7 @@ elk_nir_optimize(nir_shader *nir, bool is_scalar, OPT(nir_lower_alu_to_scalar, NULL, NULL); } else { OPT(nir_opt_shrink_stores, true); - OPT(nir_opt_shrink_vectors); + OPT(nir_opt_shrink_vectors, false); } OPT(nir_copy_prop); @@ -1401,7 +1401,7 @@ elk_postprocess_nir(nir_shader *nir, const struct elk_compiler *compiler, * vec1 ssa_2 = ffma ssa_1, ... */ if (OPT(intel_nir_opt_peephole_ffma)) - OPT(nir_opt_shrink_vectors); + OPT(nir_opt_shrink_vectors, false); } if (is_scalar) diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index caaf3dadff4..f9590c817f8 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -1198,7 +1198,7 @@ nak_postprocess_nir(nir_shader *nir, nak_optimize_nir(nir, nak); } - OPT(nir, nir_opt_shrink_vectors); + OPT(nir, nir_opt_shrink_vectors, true); nir_load_store_vectorize_options vectorize_opts = {}; vectorize_opts.modes = nir_var_mem_global | diff --git a/src/nouveau/vulkan/nvk_codegen.c b/src/nouveau/vulkan/nvk_codegen.c index e4b6423c26b..a1eae570c21 100644 --- a/src/nouveau/vulkan/nvk_codegen.c +++ b/src/nouveau/vulkan/nvk_codegen.c @@ -174,7 +174,7 @@ nvk_cg_optimize_nir(nir_shader *nir) } } while (progress); - NIR_PASS(progress, nir, nir_opt_shrink_vectors); + NIR_PASS(progress, nir, nir_opt_shrink_vectors, true); NIR_PASS(progress, nir, nir_remove_dead_variables, nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL); } diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 7ec98e701bb..dbbae9339e1 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -4452,7 +4452,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) NIR_PASS(progress, nir, nir_opt_undef); NIR_PASS(progress, nir, nir_lower_undef_to_zero); - NIR_PASS(progress, nir, nir_opt_shrink_vectors); + NIR_PASS(progress, nir, nir_opt_shrink_vectors, false); NIR_PASS(progress, nir, nir_opt_loop_unroll); } while (progress); |