radv: Refactor calculate_tess_lds_size and get_tcs_num_patches.

Previously these functions needed the bit mask of the TCS outputs and patch outputs written, and concluded the number of outputs from that. Now, they take the number of outputs and patch outputs instead. This will allow the backend compiler to better optimize the LDS layout. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4388>
author: Timur Kristóf <timur.kristof@gmail.com> 2020-03-30 16:04:53 +0200
committer: Marge Bot <eric+marge@anholt.net> 2020-04-29 11:51:04 +0000
commit: fd0248c37bfaa0dabbab11fc3060ebe52443eb05 (patch)
tree: e6e478b08f22cda83eca20db6a9bc9ab92613a92
parent: 9392ddab4399d796fdf37602f586965ec17f2b2a (diff)
3 files changed, 25 insertions, 21 deletions
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 80280319673..bf9e96e0b1c 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -105,6 +105,8 @@ struct isel_context {
    unsigned tcs_tess_lvl_in_loc;
    uint64_t tcs_temp_only_inputs;
    uint32_t tcs_num_inputs;
+   uint32_t tcs_num_outputs;
+   uint32_t tcs_num_patch_outputs;
    uint32_t tcs_num_patches;
    bool tcs_in_out_eq = false;
 
@@ -871,12 +873,15 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir)
       unreachable("Unsupported TCS shader stage");
    }
 
+   ctx->tcs_num_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
+   ctx->tcs_num_patch_outputs = util_last_bit64(ctx->args->shader_info->tcs.patch_outputs_written);
+
    ctx->tcs_num_patches = get_tcs_num_patches(
                              ctx->args->options->key.tcs.input_vertices,
                              nir->info.tess.tcs_vertices_out,
                              ctx->tcs_num_inputs,
-                             ctx->args->shader_info->tcs.outputs_written,
-                             ctx->args->shader_info->tcs.patch_outputs_written,
+                             ctx->tcs_num_outputs,
+                             ctx->tcs_num_patch_outputs,
                              ctx->args->options->tess_offchip_block_dw_size,
                              ctx->args->options->chip_class,
                              ctx->args->options->family);
@@ -885,8 +890,8 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir)
                              nir->info.tess.tcs_vertices_out,
                              ctx->tcs_num_inputs,
                              ctx->tcs_num_patches,
-                             ctx->args->shader_info->tcs.outputs_written,
-                             ctx->args->shader_info->tcs.patch_outputs_written);
+                             ctx->tcs_num_outputs,
+                             ctx->tcs_num_patch_outputs);
 
    ctx->args->shader_info->tcs.num_patches = ctx->tcs_num_patches;
    ctx->args->shader_info->tcs.lds_size = lds_size;
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index a40467a3194..3f214f79b92 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -4004,13 +4004,15 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
 				ctx.tcs_num_inputs = args->options->key.tcs.num_inputs;
 			else
 				ctx.tcs_num_inputs = util_last_bit64(args->shader_info->vs.ls_outputs_written);
+			unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
+			unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
 			ctx.tcs_num_patches =
 				get_tcs_num_patches(
 					ctx.args->options->key.tcs.input_vertices,
 					ctx.shader->info.tess.tcs_vertices_out,
 					ctx.tcs_num_inputs,
-					ctx.args->shader_info->tcs.outputs_written,
-					ctx.args->shader_info->tcs.patch_outputs_written,
+					tcs_num_outputs,
+					tcs_num_patch_outputs,
 					ctx.args->options->tess_offchip_block_dw_size,
 					ctx.args->options->chip_class,
 					ctx.args->options->family);
@@ -4114,6 +4116,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
 		}
 
 		if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
+			unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
+			unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
 			args->shader_info->tcs.num_patches = ctx.tcs_num_patches;
 			args->shader_info->tcs.lds_size =
 				calculate_tess_lds_size(
@@ -4121,8 +4125,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
 					ctx.shader->info.tess.tcs_vertices_out,
 					ctx.tcs_num_inputs,
 					ctx.tcs_num_patches,
-					ctx.args->shader_info->tcs.outputs_written,
-					ctx.args->shader_info->tcs.patch_outputs_written);
+					tcs_num_outputs,
+					tcs_num_patch_outputs);
 		}
 	}
 
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 165df3afe2e..608900b5419 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -527,19 +527,16 @@ calculate_tess_lds_size(unsigned tcs_num_input_vertices,
 			unsigned tcs_num_output_vertices,
 			unsigned tcs_num_inputs,
 			unsigned tcs_num_patches,
-			unsigned tcs_outputs_written,
-			unsigned tcs_per_patch_outputs_written)
+			unsigned tcs_num_outputs,
+			unsigned tcs_num_patch_outputs)
 {
-	unsigned num_tcs_outputs = util_last_bit64(tcs_outputs_written);
-	unsigned num_tcs_patch_outputs = util_last_bit64(tcs_per_patch_outputs_written);
-
 	unsigned input_vertex_size = tcs_num_inputs * 16;
-	unsigned output_vertex_size = num_tcs_outputs * 16;
+	unsigned output_vertex_size = tcs_num_outputs * 16;
 
 	unsigned input_patch_size = tcs_num_input_vertices * input_vertex_size;
 
 	unsigned pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
-	unsigned output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+	unsigned output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
 
 	unsigned output_patch0_offset = input_patch_size * tcs_num_patches;
 
@@ -550,19 +547,17 @@ static inline unsigned
 get_tcs_num_patches(unsigned tcs_num_input_vertices,
 			unsigned tcs_num_output_vertices,
 			unsigned tcs_num_inputs,
-			unsigned tcs_outputs_written,
-			unsigned tcs_per_patch_outputs_written,
+			unsigned tcs_num_outputs,
+			unsigned tcs_num_patch_outputs,
 			unsigned tess_offchip_block_dw_size,
 			enum chip_class chip_class,
 			enum radeon_family family)
 {
 	uint32_t input_vertex_size = tcs_num_inputs * 16;
 	uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
-	uint32_t num_tcs_outputs = util_last_bit64(tcs_outputs_written);
-	uint32_t num_tcs_patch_outputs = util_last_bit64(tcs_per_patch_outputs_written);
-	uint32_t output_vertex_size = num_tcs_outputs * 16;
+	uint32_t output_vertex_size = tcs_num_outputs * 16;
 	uint32_t pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
-	uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+	uint32_t output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
 
 	/* Ensure that we only need one wave per SIMD so we don't need to check
 	 * resource usage. Also ensures that the number of tcs in and out
author	Timur Kristóf <timur.kristof@gmail.com>	2020-03-30 16:04:53 +0200
committer	Marge Bot <eric+marge@anholt.net>	2020-04-29 11:51:04 +0000
commit	fd0248c37bfaa0dabbab11fc3060ebe52443eb05 (patch)
tree	e6e478b08f22cda83eca20db6a9bc9ab92613a92
parent	9392ddab4399d796fdf37602f586965ec17f2b2a (diff)