diff options
author | Marek Olšák <marek.olsak@amd.com> | 2016-11-29 19:25:03 +0100 |
---|---|---|
committer | Marek Olšák <marek.olsak@amd.com> | 2016-12-01 02:16:51 +0100 |
commit | 72d48fcd8eb5862c72d27e5462c289c5de65396e (patch) | |
tree | c7e9205da4c37e6949fda8605cc0b1cf386cbc32 /src/gallium | |
parent | ec36c63b4f417973a6d50d79281f4834682c4555 (diff) |
radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips
All codepaths are handled except for clover.
Cc: 13.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 24 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 6 |
4 files changed, 29 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 91f1b0a742..9d83cb3a73 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -348,6 +348,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, lds_blocks += align(program->local_size, 512) >> 9; } + /* TODO: use si_multiwave_lds_size_workaround */ assert(lds_blocks <= 0xFF); config->rsrc2 &= C_00B84C_LDS_SIZE; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b19c61e99d..80c063aaee 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -8198,11 +8198,31 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, return true; } -static void si_fix_num_sgprs(struct si_shader *shader) +void si_multiwave_lds_size_workaround(struct si_screen *sscreen, + unsigned *lds_size) +{ + /* SPI barrier management bug: + * Make sure we have at least 4k of LDS in use to avoid the bug. + * It applies to workgroup sizes of more than one wavefront. + */ + if (sscreen->b.family == CHIP_BONAIRE || + sscreen->b.family == CHIP_KABINI || + sscreen->b.family == CHIP_MULLINS) + *lds_size = MAX2(*lds_size, 8); +} + +static void si_fix_resource_usage(struct si_screen *sscreen, + struct si_shader *shader) { unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */ shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs); + + if (shader->selector->type == PIPE_SHADER_COMPUTE && + si_get_max_workgroup_size(shader) > 64) { + si_multiwave_lds_size_workaround(sscreen, + &shader->config.lds_size); + } } int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, @@ -8297,7 +8317,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, } } - si_fix_num_sgprs(shader); + si_fix_resource_usage(sscreen, shader); si_shader_dump(sscreen, shader, debug, sel->info.processor, stderr); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d4bc47b387..129e571aa7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -547,6 +547,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug, unsigned processor, FILE *f); +void si_multiwave_lds_size_workaround(struct si_screen *sscreen, + unsigned *lds_size); void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, struct si_shader_config *config, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e904164faf..10073ef9d7 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -176,11 +176,13 @@ static void si_emit_derived_tess_state(struct si_context *sctx, if (sctx->b.chip_class >= CIK) { assert(lds_size <= 65536); - ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512); + lds_size = align(lds_size, 512) / 512; } else { assert(lds_size <= 32768); - ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256); + lds_size = align(lds_size, 256) / 256; } + si_multiwave_lds_size_workaround(sctx->screen, &lds_size); + ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size); /* Due to a hw bug, RSRC2_LS must be written twice with another * LS register written in between. */ |