summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2021-12-08 02:15:50 -0500
committerMarek Olšák <marek.olsak@amd.com>2022-01-05 01:36:10 -0500
commit384014bebe10cc56238bece7499bb23e5e6eff96 (patch)
tree1e9a61df9c967764b65c0533d00cd0c122707278
parentb06b481dfe55c831cbfad45ea1299bcb437b1555 (diff)
radeonsi: apply spi_cu_en to CU_EN
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14122>
-rw-r--r--src/amd/common/ac_shader_util.c18
-rw-r--r--src/amd/common/ac_shader_util.h4
-rw-r--r--src/gallium/drivers/radeonsi/si_build_pm4.h9
-rw-r--r--src/gallium/drivers/radeonsi/si_pm4.h3
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c29
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.cpp73
6 files changed, 104 insertions, 32 deletions
diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c
index 943523b88d9..52c87e56c67 100644
--- a/src/amd/common/ac_shader_util.c
+++ b/src/amd/common/ac_shader_util.c
@@ -581,3 +581,21 @@ unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims
return CLAMP(workgroup_size, 1, 256);
}
+
+void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask,
+ unsigned value_shift, const struct radeon_info *info,
+ void set_sh_reg(void*, unsigned, uint32_t))
+{
+ /* Register field position and mask. */
+ uint32_t cu_en_mask = ~clear_mask;
+ unsigned cu_en_shift = ffs(cu_en_mask) - 1;
+ /* The value being set. */
+ uint32_t cu_en = (value & cu_en_mask) >> cu_en_shift;
+
+ /* AND the field by spi_cu_en. */
+ uint32_t spi_cu_en = info->spi_cu_en >> value_shift;
+ uint32_t new_value = (value & ~cu_en_mask) |
+ (((cu_en & spi_cu_en) << cu_en_shift) & cu_en_mask);
+
+ set_sh_reg(cs, reg_offset, new_value);
+}
diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h
index fcf4e48ca15..b86e81ca039 100644
--- a/src/amd/common/ac_shader_util.h
+++ b/src/amd/common/ac_shader_util.h
@@ -118,6 +118,10 @@ unsigned ac_compute_esgs_workgroup_size(enum chip_class chip_class, unsigned wav
unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims,
unsigned max_vtx_out, unsigned prim_amp_factor);
+void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask,
+ unsigned value_shift, const struct radeon_info *info,
+ void set_sh_reg(void*, unsigned, uint32_t));
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h
index 66589d6a0b3..6b461c9db6c 100644
--- a/src/gallium/drivers/radeonsi/si_build_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_build_pm4.h
@@ -279,6 +279,15 @@
radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \
} while (0)
+/* Wrappers that are only used when they are passed as function pointers. */
+static inline void radeon_set_sh_reg_func(struct radeon_cmdbuf *cs, unsigned reg_offset,
+ uint32_t value)
+{
+ radeon_begin(cs);
+ radeon_set_sh_reg(reg_offset, value);
+ radeon_end();
+}
+
/* This should be evaluated at compile time if all parameters are constants. */
static ALWAYS_INLINE unsigned
si_get_user_data_base(enum chip_class chip_class, enum si_has_tess has_tess,
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index 03f79e0ba30..8946018829d 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -31,7 +31,8 @@
extern "C" {
#endif
-#define SI_PM4_MAX_DW 176
+/* TODO: This is high because of cs_preamble with ac_set_reg_cu_en. */
+#define SI_PM4_MAX_DW 480
// forward defines
struct si_context;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index e82235d7f51..8d242b85c40 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -5467,8 +5467,9 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
cu_mask_ps = u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa);
if (sctx->chip_class >= GFX7) {
- si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
- S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));
+ ac_set_reg_cu_en(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+ S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F),
+ C_00B01C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
}
if (sctx->chip_class <= GFX8) {
@@ -5503,11 +5504,13 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
}
if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) {
- si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
- S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+ ac_set_reg_cu_en(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+ S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
+ C_00B51C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
- si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
- S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
+ ac_set_reg_cu_en(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+ S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
+ C_00B31C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
/* If this is 0, Bonaire can hang even if GS isn't being used.
* Other chips are unaffected. These are suboptimal values,
@@ -5547,8 +5550,9 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
}
if (sctx->chip_class >= GFX9) {
- si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
- S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
+ ac_set_reg_cu_en(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+ S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN,
+ 0, &sscreen->info, (void*)si_pm4_set_reg);
si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) |
@@ -5566,9 +5570,12 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
if (sctx->chip_class >= GFX10) {
/* Logical CUs 16 - 31 */
- si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16));
- si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff));
- si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff));
+ ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16),
+ C_00B004_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
+ ac_set_reg_cu_en(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff),
+ C_00B104_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
+ ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff),
+ C_00B404_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index c32a014270b..9b4d6503f18 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -919,18 +919,35 @@ static void si_emit_shader_gs(struct si_context *sctx)
radeon_end_update_context_roll(sctx);
/* These don't cause any context rolls. */
- radeon_begin_again(&sctx->gfx_cs);
- if (sctx->chip_class >= GFX7) {
- radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
- SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
- shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs);
- }
- if (sctx->chip_class >= GFX10) {
- radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
- SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
- shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs);
+ if (sctx->screen->info.spi_cu_en_has_effect) {
+ if (sctx->chip_class >= GFX7) {
+ ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+ shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs,
+ C_00B21C_CU_EN, 0, &sctx->screen->info,
+ (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
+ sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
+ }
+ if (sctx->chip_class >= GFX10) {
+ ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+ shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs,
+ C_00B204_CU_EN, 16, &sctx->screen->info,
+ (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
+ sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS);
+ }
+ } else {
+ radeon_begin_again(&sctx->gfx_cs);
+ if (sctx->chip_class >= GFX7) {
+ radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+ SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
+ shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs);
+ }
+ if (sctx->chip_class >= GFX10) {
+ radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+ SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
+ shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs);
+ }
+ radeon_end();
}
- radeon_end();
}
static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
@@ -1129,13 +1146,27 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
radeon_begin_again(&sctx->gfx_cs);
radeon_opt_set_uconfig_reg(sctx, R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC,
shader->ctx_reg.ngg.ge_pc_alloc);
- radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
- SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
- shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs);
- radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
- SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
- shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs);
- radeon_end();
+ if (sctx->screen->info.spi_cu_en_has_effect) {
+ radeon_end();
+ ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+ shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs,
+ C_00B21C_CU_EN, 0, &sctx->screen->info,
+ (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
+ ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+ shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs,
+ C_00B204_CU_EN, 16, &sctx->screen->info,
+ (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
+ sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS) &
+ ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
+ } else {
+ radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+ SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
+ shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs);
+ radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+ SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
+ shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs);
+ radeon_end();
+ }
}
static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx)
@@ -1599,8 +1630,10 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
if (sscreen->info.chip_class >= GFX7) {
- si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
- S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F));
+ ac_set_reg_cu_en(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+ S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
+ C_00B118_CU_EN, 0, &sscreen->info,
+ (void (*)(void*, unsigned, uint32_t))si_pm4_set_reg);
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
}