summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSonny Jiang <sonny.jiang@amd.com>2018-10-03 11:53:09 -0400
committerMarek Olšák <marek.olsak@amd.com>2018-10-05 17:53:52 -0400
commiteeb9170599468f4e9c0a48e1507a53cde66a9825 (patch)
tree296001b855acff8bbf59ed93df3edd908d168830
parenta1bc1523408a305b14a8c297ea93a28bb12cee5d (diff)
radeonsi: optimizing SET_CONTEXT_REG for shaders ES
Signed-off-by: Sonny Jiang <sonny.jiang@amd.com> Signed-off-by: Marek Olšák <marek.olsak@amd.com>
-rw-r--r--src/gallium/drivers/radeonsi/si_gfx_cs.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_pm4.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_pm4.h11
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h9
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c23
5 files changed, 37 insertions, 10 deletions
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index bdb576f7e5c..95c02fbc1ea 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -349,6 +349,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE] = 0xffff;
+ ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE] = 0x00000000;
/* Set all saved registers state to saved. */
ctx->tracked_regs.reg_saved = 0xffffffff;
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index 446edea49a9..1e686d8060b 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -144,6 +144,9 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
radeon_emit(cs, ib->gpu_address >> 32);
radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
}
+
+ if (state->atom.emit)
+ state->atom.emit(sctx);
}
void si_pm4_reset_emitted(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index 020da7a3903..b2db236c495 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -33,6 +33,13 @@
// forward defines
struct si_context;
+/* State atoms are callbacks which write a sequence of packets into a GPU
+ * command buffer (AKA indirect buffer, AKA IB, AKA command stream, AKA CS).
+ */
+struct si_atom {
+ void (*emit)(struct si_context *ctx);
+};
+
struct si_pm4_state
{
/* optional indirect buffer */
@@ -52,6 +59,10 @@ struct si_pm4_state
struct r600_resource *bo[SI_PM4_MAX_BO];
enum radeon_bo_usage bo_usage[SI_PM4_MAX_BO];
enum radeon_bo_priority bo_priority[SI_PM4_MAX_BO];
+
+ /* For shader states only */
+ struct si_shader *shader;
+ struct si_atom atom;
};
void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 89bb5b64a3e..7fb09ca953b 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -45,13 +45,6 @@ struct si_shader_selector;
struct si_texture;
struct si_qbo_state;
-/* State atoms are callbacks which write a sequence of packets into a GPU
- * command buffer (AKA indirect buffer, AKA IB, AKA command stream, AKA CS).
- */
-struct si_atom {
- void (*emit)(struct si_context *ctx);
-};
-
struct si_state_blend {
struct si_pm4_state pm4;
uint32_t cb_target_mask;
@@ -284,6 +277,8 @@ enum si_tracked_reg {
SI_TRACKED_PA_SC_CLIPRECT_RULE,
+ SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
+
SI_NUM_TRACKED_REGS,
};
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 0bf783fc17e..fb0a2f1d190 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -452,7 +452,13 @@ static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader)
else
shader->pm4 = CALLOC_STRUCT(si_pm4_state);
- return shader->pm4;
+ if (shader->pm4) {
+ shader->pm4->shader = shader;
+ return shader->pm4;
+ } else {
+ fprintf(stderr, "radeonsi: Failed to create pm4 state.\n");
+ return NULL;
+ }
}
static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
@@ -552,6 +558,18 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
}
}
+static void si_emit_shader_es(struct si_context *sctx)
+{
+ struct si_shader *shader = sctx->queued.named.es->shader;
+
+ if (!shader)
+ return;
+
+ radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+ SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
+ shader->selector->esgs_itemsize / 4);
+}
+
static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
{
struct si_pm4_state *pm4;
@@ -566,6 +584,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
if (!pm4)
return;
+ pm4->atom.emit = si_emit_shader_es;
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
@@ -581,8 +600,6 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0;
- si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- shader->selector->esgs_itemsize / 4);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(va >> 40));
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,