summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>2021-03-24 19:15:52 +0100
committerMarge Bot <eric+marge@anholt.net>2021-04-05 08:54:55 +0000
commit65bca137bd6e99d3113f1e983a95b666e107e93a (patch)
tree6bc5cd4e8f0113d5e36acd84ee16ea7b23d0b794
parent3dfb45362619115e78c93e0d6f299e7e9d4ee5d3 (diff)
aco: implement a workaround for the image load DCC hw bug on GFX10.3
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9919>
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp23
-rw-r--r--src/amd/vulkan/radv_shader.c1
-rw-r--r--src/amd/vulkan/radv_shader.h1
3 files changed, 25 insertions, 0 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index ec5e4f4978d..cf8d8c1aa1b 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5474,6 +5474,29 @@ Temp get_sampler_desc(isel_context *ctx, nir_deref_instr *deref_instr,
res = bld.pseudo(aco_opcode::p_create_vector, bld.def(s8),
components[0], components[1], components[2], components[3],
components[4], components[5], components[6], components[7]);
+ } else if (desc_type == ACO_DESC_IMAGE &&
+ ctx->options->has_image_load_dcc_bug &&
+ image && !write) {
+ Temp components[8];
+ for (unsigned i = 0; i < 8; i++)
+ components[i] = bld.tmp(s1);
+
+ bld.pseudo(aco_opcode::p_split_vector,
+ Definition(components[0]), Definition(components[1]),
+ Definition(components[2]), Definition(components[3]),
+ Definition(components[4]), Definition(components[5]),
+ Definition(components[6]), Definition(components[7]), res);
+
+ /* WRITE_COMPRESS_ENABLE must be 0 for all image loads to workaround a
+ * hardware bug.
+ */
+ components[6] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
+ components[6],
+ bld.copy(bld.def(s1), Operand((uint32_t)C_00A018_WRITE_COMPRESS_ENABLE)));
+
+ res = bld.pseudo(aco_opcode::p_create_vector, bld.def(s8),
+ components[0], components[1], components[2], components[3],
+ components[4], components[5], components[6], components[7]);
}
return res;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 17a3a51f297..61813d5ee53 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -1416,6 +1416,7 @@ shader_variant_compile(struct radv_device *device,
options->enable_mrt_output_nan_fixup = module && !module->nir &&
device->instance->enable_mrt_output_nan_fixup;
options->adjust_frag_coord_z = device->adjust_frag_coord_z;
+ options->has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug;
options->debug.func = radv_compiler_debug;
options->debug.private_data = &debug_data;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 2d0e0e6bf83..d489cae20db 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -132,6 +132,7 @@ struct radv_nir_compiler_options {
bool record_stats;
bool check_ir;
bool has_ls_vgpr_init_bug;
+ bool has_image_load_dcc_bug;
bool use_ngg_streamout;
bool enable_mrt_output_nan_fixup;
bool disable_optimizations; /* only used by ACO */