summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2024-03-18 00:38:45 -0400
committerMarge Bot <emma+marge@anholt.net>2024-04-04 01:25:06 +0000
commit772149b15ae59056364a880a7715aeb68631d3db (patch)
tree40681cce8a72e38cd7d8086f1ee54b25c79553a9
parentb9b557f2e747a92011ee17269964da09dbc14d07 (diff)
nir/opt_varyings: handle load_input_vertex
Explicit interpolation just loads raw vertex data as-is and lets the FS do the interpolation manually. This adds handling of nir_intrinsic_load_input_vertex, which has 2 different behaviors: undefined vertex ordering and strict vertex ordering. - dead IO removed correctly - constants and uniform expressions are propagated normally - outputs are deduplicated within their own category (strict and non-strict) - outputs used by explicit interpolation are never treated as "convergent" - backward inter-shader code motion is skipped - compaction has 2 new types of vec4 slots: - mixed 32-bit and 16-bit explicit strict (sharing the same vec4) - mixed 32-bit and 16-bit explicit non-strict (sharing the same vec4) Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28247>
-rw-r--r--src/compiler/nir/nir_opt_varyings.c114
1 files changed, 100 insertions, 14 deletions
diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c
index 609f81d3e4f..f9cab8bf901 100644
--- a/src/compiler/nir/nir_opt_varyings.c
+++ b/src/compiler/nir/nir_opt_varyings.c
@@ -504,6 +504,8 @@ enum fs_vec4_type {
FS_VEC4_TYPE_INTERP_FP32,
FS_VEC4_TYPE_INTERP_FP16,
FS_VEC4_TYPE_INTERP_COLOR,
+ FS_VEC4_TYPE_INTERP_EXPLICIT,
+ FS_VEC4_TYPE_INTERP_EXPLICIT_STRICT,
};
static unsigned
@@ -627,6 +629,10 @@ struct linkage_info {
BITSET_DECLARE(interp_fp16_mask, NUM_SCALAR_SLOTS);
BITSET_DECLARE(flat32_mask, NUM_SCALAR_SLOTS);
BITSET_DECLARE(flat16_mask, NUM_SCALAR_SLOTS);
+ BITSET_DECLARE(interp_explicit32_mask, NUM_SCALAR_SLOTS);
+ BITSET_DECLARE(interp_explicit16_mask, NUM_SCALAR_SLOTS);
+ BITSET_DECLARE(interp_explicit_strict32_mask, NUM_SCALAR_SLOTS);
+ BITSET_DECLARE(interp_explicit_strict16_mask, NUM_SCALAR_SLOTS);
/* Color interpolation unqualified (follows the flat-shade state). */
BITSET_DECLARE(color32_mask, NUM_SCALAR_SLOTS);
@@ -689,12 +695,16 @@ print_linkage(struct linkage_info *linkage)
!BITSET_TEST(linkage->interp_fp16_mask, i) &&
!BITSET_TEST(linkage->flat32_mask, i) &&
!BITSET_TEST(linkage->flat16_mask, i) &&
+ !BITSET_TEST(linkage->interp_explicit32_mask, i) &&
+ !BITSET_TEST(linkage->interp_explicit16_mask, i) &&
+ !BITSET_TEST(linkage->interp_explicit_strict32_mask, i) &&
+ !BITSET_TEST(linkage->interp_explicit_strict16_mask, i) &&
!BITSET_TEST(linkage->convergent32_mask, i) &&
!BITSET_TEST(linkage->convergent16_mask, i) &&
!BITSET_TEST(linkage->output_equal_mask, i))
continue;
- printf(" %7s.%c.%s: num_slots=%2u%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ printf(" %7s.%c.%s: num_slots=%2u%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
gl_varying_slot_name_for_stage(vec4_slot(i),
linkage->producer_stage) + 13,
"xyzw"[(i / 2) % 4],
@@ -710,6 +720,10 @@ print_linkage(struct linkage_info *linkage)
BITSET_TEST(linkage->interp_fp16_mask, i) ? " interp_fp16" : "",
BITSET_TEST(linkage->flat32_mask, i) ? " flat32" : "",
BITSET_TEST(linkage->flat16_mask, i) ? " flat16" : "",
+ BITSET_TEST(linkage->interp_explicit32_mask, i) ? " interp_explicit32" : "",
+ BITSET_TEST(linkage->interp_explicit16_mask, i) ? " interp_explicit16" : "",
+ BITSET_TEST(linkage->interp_explicit_strict32_mask, i) ? " interp_explicit_strict32" : "",
+ BITSET_TEST(linkage->interp_explicit_strict16_mask, i) ? " interp_explicit_strict16" : "",
BITSET_TEST(linkage->convergent32_mask, i) ? " convergent32" : "",
BITSET_TEST(linkage->convergent16_mask, i) ? " convergent16" : "",
BITSET_TEST(linkage->output_equal_mask, i) ? " output_equal" : "",
@@ -730,6 +744,10 @@ slot_disable_optimizations_and_compaction(struct linkage_info *linkage,
BITSET_CLEAR(linkage->interp_fp16_mask, i);
BITSET_CLEAR(linkage->flat32_mask, i);
BITSET_CLEAR(linkage->flat16_mask, i);
+ BITSET_CLEAR(linkage->interp_explicit32_mask, i);
+ BITSET_CLEAR(linkage->interp_explicit16_mask, i);
+ BITSET_CLEAR(linkage->interp_explicit_strict32_mask, i);
+ BITSET_CLEAR(linkage->interp_explicit_strict16_mask, i);
BITSET_CLEAR(linkage->no_varying32_mask, i);
BITSET_CLEAR(linkage->no_varying16_mask, i);
BITSET_CLEAR(linkage->color32_mask, i);
@@ -1040,7 +1058,8 @@ gather_inputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_d
if (intr->intrinsic != nir_intrinsic_load_input &&
intr->intrinsic != nir_intrinsic_load_per_vertex_input &&
- intr->intrinsic != nir_intrinsic_load_interpolated_input)
+ intr->intrinsic != nir_intrinsic_load_interpolated_input &&
+ intr->intrinsic != nir_intrinsic_load_input_vertex)
return false;
/* nir_lower_io_to_scalar is required before this */
@@ -1074,16 +1093,29 @@ gather_inputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_d
* have unused components, but only if they are of the same type.
*/
if (linkage->consumer_stage == MESA_SHADER_FRAGMENT) {
- if (intr->intrinsic == nir_intrinsic_load_input)
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_input:
fs_vec4_type = FS_VEC4_TYPE_FLAT;
- else if (color_uses_shade_model(linkage, slot))
- fs_vec4_type = FS_VEC4_TYPE_INTERP_COLOR;
- else if (intr->def.bit_size == 32)
- fs_vec4_type = FS_VEC4_TYPE_INTERP_FP32;
- else if (intr->def.bit_size == 16)
- fs_vec4_type = FS_VEC4_TYPE_INTERP_FP16;
- else
- unreachable("invalid load_interpolate_input type");
+ break;
+ case nir_intrinsic_load_input_vertex:
+ if (sem.interp_explicit_strict)
+ fs_vec4_type = FS_VEC4_TYPE_INTERP_EXPLICIT_STRICT;
+ else
+ fs_vec4_type = FS_VEC4_TYPE_INTERP_EXPLICIT;
+ break;
+ case nir_intrinsic_load_interpolated_input:
+ if (color_uses_shade_model(linkage, slot))
+ fs_vec4_type = FS_VEC4_TYPE_INTERP_COLOR;
+ else if (intr->def.bit_size == 32)
+ fs_vec4_type = FS_VEC4_TYPE_INTERP_FP32;
+ else if (intr->def.bit_size == 16)
+ fs_vec4_type = FS_VEC4_TYPE_INTERP_FP16;
+ else
+ unreachable("invalid load_interpolated_input type");
+ break;
+ default:
+ unreachable("unexpected input load intrinsic");
+ }
linkage->fs_vec4_type[sem.location] = fs_vec4_type;
}
@@ -1107,14 +1139,33 @@ gather_inputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_d
/* Record inputs that can be compacted. */
if (linkage->consumer_stage == MESA_SHADER_FRAGMENT) {
- if (intr->intrinsic == nir_intrinsic_load_input) {
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_input:
if (intr->def.bit_size == 32)
BITSET_SET(linkage->flat32_mask, slot);
else if (intr->def.bit_size == 16)
BITSET_SET(linkage->flat16_mask, slot);
else
unreachable("invalid load_input type");
- } else {
+ break;
+ case nir_intrinsic_load_input_vertex:
+ if (sem.interp_explicit_strict) {
+ if (intr->def.bit_size == 32)
+ BITSET_SET(linkage->interp_explicit_strict32_mask, slot);
+ else if (intr->def.bit_size == 16)
+ BITSET_SET(linkage->interp_explicit_strict16_mask, slot);
+ else
+ unreachable("invalid load_input_vertex type");
+ } else {
+ if (intr->def.bit_size == 32)
+ BITSET_SET(linkage->interp_explicit32_mask, slot);
+ else if (intr->def.bit_size == 16)
+ BITSET_SET(linkage->interp_explicit16_mask, slot);
+ else
+ unreachable("invalid load_input_vertex type");
+ }
+ break;
+ case nir_intrinsic_load_interpolated_input:
if (color_uses_shade_model(linkage, slot))
BITSET_SET(linkage->color32_mask, slot);
else if (intr->def.bit_size == 32)
@@ -1122,7 +1173,10 @@ gather_inputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_d
else if (intr->def.bit_size == 16)
BITSET_SET(linkage->interp_fp16_mask, slot);
else
- unreachable("invalid load_interpolate_input type");
+ unreachable("invalid load_interpolated_input type");
+ break;
+ default:
+ unreachable("unexpected input load intrinsic");
}
} else {
if (intr->def.bit_size == 32)
@@ -2117,6 +2171,8 @@ enum var_qualifier {
QUAL_PATCH,
QUAL_VAR_FLAT,
QUAL_COLOR_FLAT,
+ QUAL_EXPLICIT,
+ QUAL_EXPLICIT_STRICT,
/* When nir_io_has_flexible_input_interpolation_except_flat is set: */
QUAL_VAR_INTERP_ANY,
QUAL_COLOR_INTERP_ANY,
@@ -2158,6 +2214,11 @@ get_input_qualifier(struct linkage_info *linkage, unsigned i)
if (load->intrinsic == nir_intrinsic_load_input)
return is_color ? QUAL_COLOR_FLAT : QUAL_VAR_FLAT;
+ if (load->intrinsic == nir_intrinsic_load_input_vertex) {
+ return nir_intrinsic_io_semantics(load).interp_explicit_strict ?
+ QUAL_EXPLICIT_STRICT : QUAL_EXPLICIT;
+ }
+
assert(load->intrinsic == nir_intrinsic_load_interpolated_input);
nir_intrinsic_instr *baryc =
nir_instr_as_intrinsic(load->src[0].ssa->parent_instr);
@@ -3357,6 +3418,11 @@ backward_inter_shader_code_motion(struct linkage_info *linkage,
load->instr.pass_flags |= FLAG_INTERP_FLAT;
}
break;
+ case nir_intrinsic_load_input_vertex:
+ /* Inter-shader code motion is unimplemented for explicit
+ * interpolation.
+ */
+ continue;
default:
unreachable("unexpected load intrinsic");
}
@@ -3893,6 +3959,26 @@ compact_varyings(struct linkage_info *linkage,
linkage->convergent16_mask, NULL,
FS_VEC4_TYPE_INTERP_FP16, 1, false, 0, progress);
+ /* Assign INTERP_MODE_EXPLICIT. Both FP32 and FP16 can occupy the same
+ * slot because the vertex data is passed to FS as-is.
+ */
+ fs_assign_slots(linkage, assigned_mask, NULL,
+ linkage->interp_explicit32_mask, FS_VEC4_TYPE_INTERP_EXPLICIT,
+ 2, NUM_SCALAR_SLOTS, false, 0, progress);
+
+ fs_assign_slots(linkage, assigned_mask, NULL,
+ linkage->interp_explicit16_mask, FS_VEC4_TYPE_INTERP_EXPLICIT,
+ 1, NUM_SCALAR_SLOTS, false, 0, progress);
+
+ /* Same for strict vertex ordering. */
+ fs_assign_slots(linkage, assigned_mask, NULL,
+ linkage->interp_explicit_strict32_mask, FS_VEC4_TYPE_INTERP_EXPLICIT_STRICT,
+ 2, NUM_SCALAR_SLOTS, false, 0, progress);
+
+ fs_assign_slots(linkage, assigned_mask, NULL,
+ linkage->interp_explicit_strict16_mask, FS_VEC4_TYPE_INTERP_EXPLICIT_STRICT,
+ 1, NUM_SCALAR_SLOTS, false, 0, progress);
+
/* Put transform-feedback-only outputs last. */
fs_assign_slots(linkage, assigned_mask, NULL,
linkage->xfb32_only_mask, FS_VEC4_TYPE_NONE, 2,