diff options
-rw-r--r-- | src/amd/common/ac_shader_util.c | 18 | ||||
-rw-r--r-- | src/amd/common/ac_shader_util.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_build_pm4.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pm4.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 29 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.cpp | 73 |
6 files changed, 104 insertions, 32 deletions
diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 943523b88d9..52c87e56c67 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -581,3 +581,21 @@ unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims return CLAMP(workgroup_size, 1, 256); } + +void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask, + unsigned value_shift, const struct radeon_info *info, + void set_sh_reg(void*, unsigned, uint32_t)) +{ + /* Register field position and mask. */ + uint32_t cu_en_mask = ~clear_mask; + unsigned cu_en_shift = ffs(cu_en_mask) - 1; + /* The value being set. */ + uint32_t cu_en = (value & cu_en_mask) >> cu_en_shift; + + /* AND the field by spi_cu_en. */ + uint32_t spi_cu_en = info->spi_cu_en >> value_shift; + uint32_t new_value = (value & ~cu_en_mask) | + (((cu_en & spi_cu_en) << cu_en_shift) & cu_en_mask); + + set_sh_reg(cs, reg_offset, new_value); +} diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index fcf4e48ca15..b86e81ca039 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -118,6 +118,10 @@ unsigned ac_compute_esgs_workgroup_size(enum chip_class chip_class, unsigned wav unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims, unsigned max_vtx_out, unsigned prim_amp_factor); +void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask, + unsigned value_shift, const struct radeon_info *info, + void set_sh_reg(void*, unsigned, uint32_t)); + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h index 66589d6a0b3..6b461c9db6c 100644 --- a/src/gallium/drivers/radeonsi/si_build_pm4.h +++ b/src/gallium/drivers/radeonsi/si_build_pm4.h @@ -279,6 +279,15 @@ radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \ } while (0) +/* Wrappers that are only used when they are passed as function pointers. */ +static inline void radeon_set_sh_reg_func(struct radeon_cmdbuf *cs, unsigned reg_offset, + uint32_t value) +{ + radeon_begin(cs); + radeon_set_sh_reg(reg_offset, value); + radeon_end(); +} + /* This should be evaluated at compile time if all parameters are constants. */ static ALWAYS_INLINE unsigned si_get_user_data_base(enum chip_class chip_class, enum si_has_tess has_tess, diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index 03f79e0ba30..8946018829d 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -31,7 +31,8 @@ extern "C" { #endif -#define SI_PM4_MAX_DW 176 +/* TODO: This is high because of cs_preamble with ac_set_reg_cu_en. */ +#define SI_PM4_MAX_DW 480 // forward defines struct si_context; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index e82235d7f51..8d242b85c40 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5467,8 +5467,9 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) cu_mask_ps = u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa); if (sctx->chip_class >= GFX7) { - si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, - S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F)); + ac_set_reg_cu_en(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, + S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F), + C_00B01C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg); } if (sctx->chip_class <= GFX8) { @@ -5503,11 +5504,13 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) } if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) { - si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, - S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F)); + ac_set_reg_cu_en(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, + S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F), + C_00B51C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg); si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F)); - si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, - S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F)); + ac_set_reg_cu_en(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, + S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F), + C_00B31C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg); /* If this is 0, Bonaire can hang even if GS isn't being used. * Other chips are unaffected. These are suboptimal values, @@ -5547,8 +5550,9 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) } if (sctx->chip_class >= GFX9) { - si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, - S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); + ac_set_reg_cu_en(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, + S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN, + 0, &sscreen->info, (void*)si_pm4_set_reg); si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) | @@ -5566,9 +5570,12 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) if (sctx->chip_class >= GFX10) { /* Logical CUs 16 - 31 */ - si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16)); - si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff)); - si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff)); + ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16), + C_00B004_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg); + ac_set_reg_cu_en(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff), + C_00B104_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg); + ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff), + C_00B404_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg); si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0); si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index c32a014270b..9b4d6503f18 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -919,18 +919,35 @@ static void si_emit_shader_gs(struct si_context *sctx) radeon_end_update_context_roll(sctx); /* These don't cause any context rolls. */ - radeon_begin_again(&sctx->gfx_cs); - if (sctx->chip_class >= GFX7) { - radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, - SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, - shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs); - } - if (sctx->chip_class >= GFX10) { - radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS, - SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, - shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs); + if (sctx->screen->info.spi_cu_en_has_effect) { + if (sctx->chip_class >= GFX7) { + ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, + shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs, + C_00B21C_CU_EN, 0, &sctx->screen->info, + (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func); + sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS); + } + if (sctx->chip_class >= GFX10) { + ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, + shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs, + C_00B204_CU_EN, 16, &sctx->screen->info, + (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func); + sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS); + } + } else { + radeon_begin_again(&sctx->gfx_cs); + if (sctx->chip_class >= GFX7) { + radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, + SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, + shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs); + } + if (sctx->chip_class >= GFX10) { + radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS, + SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, + shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs); + } + radeon_end(); } - radeon_end(); } static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) @@ -1129,13 +1146,27 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader radeon_begin_again(&sctx->gfx_cs); radeon_opt_set_uconfig_reg(sctx, R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC, shader->ctx_reg.ngg.ge_pc_alloc); - radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, - SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, - shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs); - radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS, - SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, - shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs); - radeon_end(); + if (sctx->screen->info.spi_cu_en_has_effect) { + radeon_end(); + ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, + shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs, + C_00B21C_CU_EN, 0, &sctx->screen->info, + (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func); + ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, + shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs, + C_00B204_CU_EN, 16, &sctx->screen->info, + (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func); + sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS) & + ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS); + } else { + radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, + SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, + shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs); + radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS, + SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, + shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs); + radeon_end(); + } } static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx) @@ -1599,8 +1630,10 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0; if (sscreen->info.chip_class >= GFX7) { - si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, - S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F)); + ac_set_reg_cu_en(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, + S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F), + C_00B118_CU_EN, 0, &sscreen->info, + (void (*)(void*, unsigned, uint32_t))si_pm4_set_reg); si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64)); } |