summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimur Kristóf <timur.kristof@gmail.com>2020-03-30 16:54:56 +0200
committerMarge Bot <eric+marge@anholt.net>2020-04-29 11:51:04 +0000
commitfdbb2968533be9a1caca731cf11c2ed3b46e6043 (patch)
tree22488c35f4524bc37085f98c0a9b83cec2e207c6
parentab07c4ea70897d8d8c4d40bd336aee38926278bf (diff)
aco: Remember VS/TCS output driver locations.
Instead of relying on calling shader_io_get_unique_index repeatedly, remember the which output driver location corresponds to which varying slot. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4388>
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp17
-rw-r--r--src/amd/compiler/aco_instruction_selection_setup.cpp11
2 files changed, 18 insertions, 10 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 5a162907917..f9246247167 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -4224,10 +4224,12 @@ std::pair<Temp, unsigned> get_tcs_per_patch_output_vmem_offset(isel_context *ctx
bool tcs_driver_location_matches_api_mask(isel_context *ctx, nir_intrinsic_instr *instr, bool per_vertex, uint64_t mask, bool *indirect)
{
+ assert(per_vertex || ctx->shader->info.stage == MESA_SHADER_TESS_CTRL);
+
if (mask == 0)
return false;
- unsigned off = nir_intrinsic_base(instr) * 4u;
+ unsigned drv_loc = nir_intrinsic_base(instr);
nir_src *off_src = nir_get_io_offset_src(instr);
if (!nir_src_is_const(*off_src)) {
@@ -4236,15 +4238,10 @@ bool tcs_driver_location_matches_api_mask(isel_context *ctx, nir_intrinsic_instr
}
*indirect = false;
- off += nir_src_as_uint(*off_src) * 16u;
-
- while (mask) {
- unsigned slot = u_bit_scan64(&mask) + (per_vertex ? 0 : VARYING_SLOT_PATCH0);
- if (off == shader_io_get_unique_index((gl_varying_slot) slot) * 16u)
- return true;
- }
-
- return false;
+ uint64_t slot = per_vertex
+ ? ctx->output_drv_loc_to_var_slot[ctx->shader->info.stage][drv_loc / 4]
+ : (ctx->output_tcs_patch_drv_loc_to_var_slot[drv_loc / 4] - VARYING_SLOT_PATCH0);
+ return (((uint64_t) 1) << slot) & mask;
}
bool store_output_to_temps(isel_context *ctx, nir_intrinsic_instr *instr)
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index f3464a7a214..c09d1459846 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -113,6 +113,8 @@ struct isel_context {
/* I/O information */
shader_io_state inputs;
shader_io_state outputs;
+ uint8_t output_drv_loc_to_var_slot[MESA_SHADER_COMPUTE][VARYING_SLOT_MAX];
+ uint8_t output_tcs_patch_drv_loc_to_var_slot[VARYING_SLOT_MAX];
};
Temp get_arg(isel_context *ctx, struct ac_arg arg)
@@ -798,6 +800,9 @@ setup_vs_variables(isel_context *ctx, nir_shader *nir)
variable->data.driver_location = variable->data.location * 4;
else
unreachable("Unsupported VS stage");
+
+ assert(variable->data.location >= 0 && variable->data.location <= UINT8_MAX);
+ ctx->output_drv_loc_to_var_slot[MESA_SHADER_VERTEX][variable->data.driver_location / 4] = variable->data.location;
}
if (ctx->stage == vertex_vs || ctx->stage == ngg_vertex_gs) {
@@ -910,6 +915,12 @@ setup_tcs_variables(isel_context *ctx, nir_shader *nir)
nir_foreach_variable(variable, &nir->outputs) {
variable->data.driver_location = shader_io_get_unique_index((gl_varying_slot) variable->data.location) * 4;
+ assert(variable->data.location >= 0 && variable->data.location <= UINT8_MAX);
+
+ if (variable->data.patch)
+ ctx->output_tcs_patch_drv_loc_to_var_slot[variable->data.driver_location / 4] = variable->data.location;
+ else
+ ctx->output_drv_loc_to_var_slot[MESA_SHADER_TESS_CTRL][variable->data.driver_location / 4] = variable->data.location;
}
ctx->tcs_tess_lvl_out_loc = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER) * 16u;