summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2015-08-19 18:45:11 +0200
committerMarek Olšák <marek.olsak@amd.com>2015-08-26 19:25:19 +0200
commit16e5d8ad388445c2e577406953a403608f1addc5 (patch)
treecde8d34dd6e21d4695dcf3cb2e61cfa2d06e92bc
parent2c14a6d3b1c53d5814414ce9e91fd8d24c90b787 (diff)
radeonsi: add IB parser support for CP DMA packets
If the packet encoding is defined in the same format as register definitions, the python script can process them automatically and the parser support becomes trivial. Acked-by: Christian König <christian.koenig@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--src/gallium/drivers/radeonsi/si_cp_dma.c17
-rw-r--r--src/gallium/drivers/radeonsi/si_debug.c24
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c6
-rw-r--r--src/gallium/drivers/radeonsi/sid.h136
4 files changed, 122 insertions, 61 deletions
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index f8a9da45a1..7bdac97eaa 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -47,10 +47,11 @@ static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
unsigned size, unsigned flags)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
- uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
+ uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
+ uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
- PKT3_CP_DMA_SRC_SEL(3) | PKT3_CP_DMA_DST_SEL(3) : 0;
+ S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
+ S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0;
assert(size);
assert((size & ((1<<21)-1)) == size);
@@ -79,16 +80,16 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
uint32_t clear_value, unsigned flags)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
- uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
- uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? PKT3_CP_DMA_DST_SEL(3) : 0;
+ uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
+ uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
+ uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0;
assert(size);
assert((size & ((1<<21)-1)) == size);
if (sctx->b.chip_class >= CIK) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
- radeon_emit(cs, sync_flag | dst_sel | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+ radeon_emit(cs, sync_flag | dst_sel | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
radeon_emit(cs, clear_value); /* DATA [31:0] */
radeon_emit(cs, 0);
radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
@@ -97,7 +98,7 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
} else {
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
radeon_emit(cs, clear_value); /* DATA [31:0] */
- radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+ radeon_emit(cs, sync_flag | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index cf09686c63..22d6f250b0 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -229,6 +229,30 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
case PKT3_NUM_INSTANCES:
si_dump_reg(f, R_030934_VGT_NUM_INSTANCES, ib[1], ~0);
break;
+ case PKT3_WRITE_DATA:
+ si_dump_reg(f, R_370_CONTROL, ib[1], ~0);
+ si_dump_reg(f, R_371_DST_ADDR_LO, ib[2], ~0);
+ si_dump_reg(f, R_372_DST_ADDR_HI, ib[3], ~0);
+ for (i = 2; i < count; i++) {
+ print_spaces(f, INDENT_PKT);
+ fprintf(f, "0x%08x\n", ib[2+i]);
+ }
+ break;
+ case PKT3_CP_DMA:
+ si_dump_reg(f, R_410_CP_DMA_WORD0, ib[1], ~0);
+ si_dump_reg(f, R_411_CP_DMA_WORD1, ib[2], ~0);
+ si_dump_reg(f, R_412_CP_DMA_WORD2, ib[3], ~0);
+ si_dump_reg(f, R_413_CP_DMA_WORD3, ib[4], ~0);
+ si_dump_reg(f, R_414_COMMAND, ib[5], ~0);
+ break;
+ case PKT3_DMA_DATA:
+ si_dump_reg(f, R_500_DMA_DATA_WORD0, ib[1], ~0);
+ si_dump_reg(f, R_501_SRC_ADDR_LO, ib[2], ~0);
+ si_dump_reg(f, R_502_SRC_ADDR_HI, ib[3], ~0);
+ si_dump_reg(f, R_503_DST_ADDR_LO, ib[4], ~0);
+ si_dump_reg(f, R_504_DST_ADDR_HI, ib[5], ~0);
+ si_dump_reg(f, R_414_COMMAND, ib[6], ~0);
+ break;
case PKT3_NOP:
if (ib[0] == 0xffff1000) {
count = -1; /* One dword NOP. */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index b1aba1290d..fd2fecaa74 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -879,9 +879,9 @@ void si_trace_emit(struct si_context *sctx)
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
- PKT3_WRITE_DATA_WR_CONFIRM |
- PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, sctx->trace_buf->gpu_address);
radeon_emit(cs, sctx->trace_buf->gpu_address >> 32);
radeon_emit(cs, sctx->trace_id);
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 66660e3282..cd6be73f66 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -69,6 +69,10 @@
#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
+/* All registers defined in this packet section don't exist and the only
+ * purpose of these definitions is to define packet encoding that
+ * the IB parser understands, and also to have an accurate documentation.
+ */
#define PKT3_NOP 0x10
#define PKT3_SET_BASE 0x11
#define PKT3_CLEAR_STATE 0x12
@@ -95,19 +99,23 @@
#define PKT3_DRAW_INDEX_OFFSET_2 0x35
#define PKT3_DRAW_PREAMBLE 0x36 /* new on CIK, required on GFX7.2 and later */
#define PKT3_WRITE_DATA 0x37
-#define PKT3_WRITE_DATA_DST_SEL(x) ((x) << 8)
-#define PKT3_WRITE_DATA_DST_SEL_REG 0
-#define PKT3_WRITE_DATA_DST_SEL_MEM_SYNC 1
-#define PKT3_WRITE_DATA_DST_SEL_TC_L2 2
-#define PKT3_WRITE_DATA_DST_SEL_GDS 3
-#define PKT3_WRITE_DATA_DST_SEL_RESERVED_4 4
-#define PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC 5
-#define PKT3_WR_ONE_ADDR (1 << 16)
-#define PKT3_WRITE_DATA_WR_CONFIRM (1 << 20)
-#define PKT3_WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
-#define PKT3_WRITE_DATA_ENGINE_SEL_ME 0
-#define PKT3_WRITE_DATA_ENGINE_SEL_PFP 1
-#define PKT3_WRITE_DATA_ENGINE_SEL_CE 2
+#define R_370_CONTROL 0x370 /* 0x[packet number][word index] */
+#define S_370_ENGINE_SEL(x) (((x) & 0x3) << 30)
+#define V_370_ME 0
+#define V_370_PFP 1
+#define V_370_CE 2
+#define V_370_DE 3
+#define S_370_WR_CONFIRM(x) (((x) & 0x1) << 20)
+#define S_370_WR_ONE_ADDR(x) (((x) & 0x1) << 16)
+#define S_370_DST_SEL(x) (((x) & 0xf) << 8)
+#define V_370_MEM_MAPPED_REGISTER 0
+#define V_370_MEMORY_SYNC 1
+#define V_370_TC_L2 2
+#define V_370_GDS 3
+#define V_370_RESERVED 4
+#define V_370_MEM_ASYNC 5
+#define R_371_DST_ADDR_LO 0x371
+#define R_372_DST_ADDR_HI 0x372
#define PKT3_DRAW_INDEX_INDIRECT_MULTI 0x38
#define PKT3_MEM_SEMAPHORE 0x39
#define PKT3_MPEG_INDEX 0x3A /* not on CIK */
@@ -159,42 +167,53 @@
* 5. DST_ADDR_HI [15:0]
* 6. COMMAND [29:22] | BYTE_COUNT [20:0]
*/
-#define PKT3_CP_DMA_CP_SYNC (1 << 31)
-#define PKT3_CP_DMA_SRC_SEL(x) ((x) << 29)
-/* 0 - SRC_ADDR
- * 1 - GDS (program SAS to 1 as well)
- * 2 - DATA
- * 3 - SRC_ADDR using TC L2 (DMA_DATA only)
- */
-#define PKT3_CP_DMA_DST_SEL(x) ((x) << 20)
-/* 0 - DST_ADDR
- * 1 - GDS (program DAS to 1 as well)
- * 3 - DST_ADDR using TC L2 (DMA_DATA only)
- */
-/* COMMAND */
-#define PKT3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
-/* 0 - none
- * 1 - 8 in 16
- * 2 - 8 in 32
- * 3 - 8 in 64
- */
-#define PKT3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
-/* 0 - none
- * 1 - 8 in 16
- * 2 - 8 in 32
- * 3 - 8 in 64
- */
-#define PKT3_CP_DMA_CMD_SAS (1 << 26)
-/* 0 - memory
- * 1 - register
- */
-#define PKT3_CP_DMA_CMD_DAS (1 << 27)
-/* 0 - memory
- * 1 - register
- */
-#define PKT3_CP_DMA_CMD_SAIC (1 << 28)
-#define PKT3_CP_DMA_CMD_DAIC (1 << 29)
-#define PKT3_CP_DMA_CMD_RAW_WAIT (1 << 30)
+#define R_410_CP_DMA_WORD0 0x410 /* 0x[packet number][word index] */
+#define S_410_SRC_ADDR_LO(x) ((x) & 0xffffffff)
+#define R_411_CP_DMA_WORD1 0x411
+#define S_411_CP_SYNC(x) (((x) & 0x1) << 31)
+#define S_411_SRC_SEL(x) (((x) & 0x3) << 29)
+#define V_411_SRC_ADDR 0
+#define V_411_GDS 1 /* program SAS to 1 as well */
+#define V_411_DATA 2
+#define V_411_SRC_ADDR_TC_L2 3 /* new for CIK */
+#define S_411_ENGINE(x) (((x) & 0x1) << 27)
+#define V_411_ME 0
+#define V_411_PFP 1
+#define S_411_DSL_SEL(x) (((x) & 0x3) << 20)
+#define V_411_DST_ADDR 0
+#define V_411_GDS 1 /* program DAS to 1 as well */
+#define V_411_DST_ADDR_TC_L2 3 /* new for CIK */
+#define S_411_SRC_ADDR_HI(x) ((x) & 0xffff)
+#define R_412_CP_DMA_WORD2 0x412 /* 0x[packet number][word index] */
+#define S_412_DST_ADDR_LO(x) ((x) & 0xffffffff)
+#define R_413_CP_DMA_WORD3 0x413 /* 0x[packet number][word index] */
+#define S_413_DST_ADDR_HI(x) ((x) & 0xffff)
+#define R_414_COMMAND 0x414
+#define S_414_BYTE_COUNT(x) ((x) & 0x1fffff)
+#define S_414_DISABLE_WR_CONFIRM(x) (((x) & 0x1) << 21)
+#define S_414_SRC_SWAP(x) (((x) & 0x3) << 22)
+#define V_414_NONE 0
+#define V_414_8_IN_16 1
+#define V_414_8_IN_32 2
+#define V_414_8_IN_64 3
+#define S_414_DST_SWAP(x) (((x) & 0x3) << 24)
+#define V_414_NONE 0
+#define V_414_8_IN_16 1
+#define V_414_8_IN_32 2
+#define V_414_8_IN_64 3
+#define S_414_SAS(x) (((x) & 0x1) << 26)
+#define V_414_MEMORY 0
+#define V_414_REGISTER 1
+#define S_414_DAS(x) (((x) & 0x1) << 27)
+#define V_414_MEMORY 0
+#define V_414_REGISTER 1
+#define S_414_SAIC(x) (((x) & 0x1) << 28)
+#define V_414_INCREMENT 0
+#define V_414_NO_INCREMENT 1
+#define S_414_DAIC(x) (((x) & 0x1) << 29)
+#define V_414_INCREMENT 0
+#define V_414_NO_INCREMENT 1
+#define S_414_RAW_WAIT(x) (((x) & 0x1) << 30)
#define PKT3_DMA_DATA 0x50 /* new for CIK */
/* 1. header
@@ -205,7 +224,24 @@
* 5. DST_ADDR_HI [31:0]
* 6. COMMAND [29:22] | BYTE_COUNT [20:0]
*/
-
+#define R_500_DMA_DATA_WORD0 0x500 /* 0x[packet number][word index] */
+#define S_500_CP_SYNC(x) (((x) & 0x1) << 31)
+#define S_500_SRC_SEL(x) (((x) & 0x3) << 29)
+#define V_500_SRC_ADDR 0
+#define V_500_GDS 1 /* program SAS to 1 as well */
+#define V_500_DATA 2
+#define V_500_SRC_ADDR_TC_L2 3 /* new for CIK */
+#define S_500_DSL_SEL(x) (((x) & 0x3) << 20)
+#define V_500_DST_ADDR 0
+#define V_500_GDS 1 /* program DAS to 1 as well */
+#define V_500_DST_ADDR_TC_L2 3 /* new for CIK */
+#define S_500_ENGINE(x) ((x) & 0x1)
+#define V_500_ME 0
+#define V_500_PFP 1
+#define R_501_SRC_ADDR_LO 0x501
+#define R_502_SRC_ADDR_HI 0x502
+#define R_503_DST_ADDR_LO 0x503
+#define R_504_DST_ADDR_HI 0x504
#define R_000E4C_SRBM_STATUS2 0x000E4C
#define S_000E4C_SDMA_RQ_PENDING(x) (((x) & 0x1) << 0)