summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom St Denis <tom.stdenis@amd.com>2017-02-04 13:31:17 -0500
committerTom St Denis <tom.stdenis@amd.com>2017-02-04 13:31:17 -0500
commitf46decaa0016367b2745763c489ba72ed469287c (patch)
tree909f2e347f3a4bcb4a7e7d33615fde786b0c2192
parentd0db3474f11d8dcc1dac6c6c078b6a7483fc45e5 (diff)
Add initial ib-following code, polish README for public release, and
add missing PKT3 opcodes. Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
-rw-r--r--README24
-rw-r--r--src/app/main.c2
-rw-r--r--src/app/ring_read.c4
-rw-r--r--src/lib/dump_ib.c1
-rw-r--r--src/lib/read_vram.c142
-rw-r--r--src/lib/ring_decode.c54
-rw-r--r--src/umr.h3
7 files changed, 201 insertions, 29 deletions
diff --git a/README b/README
index 0a5a38e..2700736 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-UserMode Register Debugger for AMDGPU Hardware
+User Mode Register Debugger for AMDGPU Hardware
Copyright (c) 2017 AMD Inc.
Introduction
@@ -18,19 +18,19 @@ to them.
The tool is open source and hosted at
- : TBD
+ : https://cgit.freedesktop.org/amd/umr/
Users may report bugs, feedback, and submit patches to the amd-gfx
mailing list at:
- https://lists.freedesktop.org/mailman/listinfo/amd-gfx
+ : https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Building
---------
-To build umr you will need pciaccess and ncurses headers. Which are
-available in both Fedora and Ubuntu (as well as other distributions).
-To build simply invoke the make command
+To build umr you will need pciaccess and ncurses headers and libraries.
+Which are available in both Fedora and Ubuntu (as well as other
+distributions). To build simply invoke the make command
$ make
@@ -76,10 +76,14 @@ Selecting Hardware
-------------------
On machines with more than one AMDGPU device umr can be instructed
-which to look at. If the driver is loaded and display attached
+which to look at. If the driver is loaded and display attached [*]
the --instance option can select a device. The devices can be listed
-with the --enumerate option. Once one is picked the number can be used
-for instance:
+with the --enumerate option.
+
+[*] Kernels v4.11 and newer only require the amdgpu module to be loaded
+in order to work without '-f'.
+
+Once one is picked the number can be used for instance:
$ umr -i 1 -s uvd6
@@ -172,7 +176,7 @@ Wavefront Debugging
The status of valid wavefronts can be displayed with the --waves
command. The bits option enables bitfield decoding. Normally,
-the colums would be pretty printed:
+the columns would be pretty printed:
$ umr -wa | column -t
diff --git a/src/app/main.c b/src/app/main.c
index fb71083..5b16946 100644
--- a/src/app/main.c
+++ b/src/app/main.c
@@ -82,6 +82,8 @@ static void parse_options(char *str)
options.read_smc = 1;
} else if (!strcmp(option, "quiet")) {
options.quiet = 1;
+ } else if (!strcmp(option, "follow_ib")) {
+ options.follow_ib = 1;
} else {
printf("error: Unknown option [%s]\n", option);
exit(EXIT_FAILURE);
diff --git a/src/app/ring_read.c b/src/app/ring_read.c
index 52206de..1726573 100644
--- a/src/app/ring_read.c
+++ b/src/app/ring_read.c
@@ -129,7 +129,9 @@ void umr_read_ring(struct umr_asic *asic, char *ringpath)
pdecoder = decoder.next_ib;
while (pdecoder) {
- umr_dump_ib(asic, pdecoder);
+ if (asic->options.follow_ib) {
+ umr_dump_ib(asic, pdecoder);
+ }
ppdecoder = pdecoder->next_ib;
free(pdecoder);
pdecoder = ppdecoder;
diff --git a/src/lib/dump_ib.c b/src/lib/dump_ib.c
index f88ed3f..4e81dbe 100644
--- a/src/lib/dump_ib.c
+++ b/src/lib/dump_ib.c
@@ -38,6 +38,7 @@ void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder)
data = calloc(sizeof(*data), decoder->next_ib_info.size/sizeof(*data));
if (data && !umr_read_vram(asic, decoder->next_ib_info.vmid, decoder->next_ib_info.ib_addr, decoder->next_ib_info.size, data)) {
// dump IB
+ decoder->pm4.cur_opcode = 0xFFFFFFFF;
for (x = 0; x < decoder->next_ib_info.size/4; x++) {
printf("IB[%5u] = 0x%08lx ... ", (unsigned)x, (unsigned long)data[x]);
umr_print_decode(asic, decoder, data[x]);
diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index 705d611..34d25fd 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -51,6 +51,144 @@ static void read_via_mmio(struct umr_asic *asic, uint64_t address, uint32_t size
}
}
+#define DEBUG printf
+
+static int umr_read_sram(uint64_t address, uint32_t size, void *dst)
+{
+ int fd;
+
+ fd = open("/dev/mem", O_RDWR);
+ if (fd >= 0) {
+ memset(dst, 0, size);
+ lseek(fd, address, SEEK_SET);
+ if (read(fd, dst, size) != size) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return 0;
+ }
+ return -1;
+}
+
+
+static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32_t size, void *dst)
+{
+ uint64_t start_addr, page_table_start_addr, page_table_base_addr,
+ page_table_size, pte_idx, pde_idx, pte_entry, pde_entry;
+ uint32_t chunk_size, tmp;
+ int page_table_depth;
+ struct {
+ uint64_t
+ frag_size,
+ pte_base_addr,
+ valid;
+ } pde_fields;
+ struct {
+ uint64_t
+ page_base_addr,
+ fragment,
+ system;
+ } pte_fields;
+ char buf[64];
+ unsigned char *pdst = dst;
+
+ /*
+ * PTE format on VI:
+ * 63:40 reserved
+ * 39:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 reserved
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format on VI:
+ * 63:59 block fragment size
+ * 58:40 reserved
+ * 39:1 physical base address of PTE
+ * bits 5:1 must be 0.
+ * 0 valid
+ */
+
+ // read vm registers
+ sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR", (int)vmid ? 1 : 0);
+ page_table_start_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
+
+ sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid ? 1 : 0);
+ tmp = umr_read_reg_by_name(asic, buf);
+ page_table_depth = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_DEPTH", tmp);
+ page_table_size = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_BLOCK_SIZE", tmp);
+
+ sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR", (int)vmid);
+ page_table_base_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
+
+ DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr);
+ DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long long)page_table_base_addr);
+ DEBUG("BASE_SIZE = %lu\n", page_table_size);
+ DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth);
+
+ address -= page_table_start_addr;
+
+ while (size) {
+ if (page_table_depth == 1) {
+ // decode addr into pte and pde selectors...
+ pde_idx = (address >> (12 + 9 + page_table_size)) & ((1UL << (19 - page_table_size)) - 1);
+ pte_idx = (address >> 12) & ((1UL << (9 + page_table_size)) - 1);
+
+ // read PDE entry
+ umr_read_vram(asic, 0xFFFF, page_table_base_addr + pde_idx * 8, 8, &pde_entry);
+
+ // decode PDE values
+ pde_fields.frag_size = (pde_entry >> 59) & 0x1F;
+ pde_fields.pte_base_addr = pde_entry & 0xFFFFFFF000;
+ pde_fields.valid = pde_entry & 1;
+ DEBUG("pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, (int)pde_fields.valid);
+
+ // now read PTE entry for this page
+ umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry);
+
+ // decode PTE values
+ pte_fields.page_base_addr = pte_entry & 0xFFFFFFF000;
+ pte_fields.fragment = (pte_entry >> 7) & 0x1F;
+ pte_fields.system = (pte_entry >> 1) & 1;
+ DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d\n", (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system);
+
+ // compute starting address
+ start_addr = pte_fields.page_base_addr + (address & 0xFFF);
+ } else {
+ fprintf(stderr, "[BUG] depth 0 page tables are not yet supported\n");
+ return -1;
+ }
+
+ // read upto 4K from it
+ if (((start_addr & 0xFFF) + size) & ~0xFFF) {
+ chunk_size = 0x1000 - (start_addr & 0xFFF);
+ } else {
+ chunk_size = size;
+ }
+ DEBUG("Computed address we will read from: %s:%llx (reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram", (unsigned long long)start_addr, (unsigned long)chunk_size);
+ if (pte_fields.system) {
+ if (umr_read_sram(start_addr, chunk_size, pdst) < 0) {
+ fprintf(stderr, "[ERROR] Cannot read system ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n");
+ return -1;
+ }
+ } else {
+ if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) {
+ fprintf(stderr, "[ERROR] Cannot read from VRAM\n");
+ return -1;
+ }
+ }
+ pdst += chunk_size;
+ size -= chunk_size;
+ address += chunk_size;
+ }
+ return 0;
+}
+
int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32_t size, void *dst)
{
@@ -74,5 +212,9 @@ int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32
}
return 0;
}
+
+ if (asic->family == FAMILY_VI)
+ return umr_read_vram_vi(asic, vmid, address, size, dst);
+
return 0;
}
diff --git a/src/lib/ring_decode.c b/src/lib/ring_decode.c
index f2c27d0..401d6ca 100644
--- a/src/lib/ring_decode.c
+++ b/src/lib/ring_decode.c
@@ -55,11 +55,11 @@ static const char *pm4_pkt3_opcode_names[] = {
"UNK", // 1a
"UNK", // 1b
"UNK", // 1c
- "UNK", // 1d
- "UNK", // 1e
+ "PKT3_ATOMIC_GDS", // 1d
+ "PKT3_ATOMIC_MEM", // 1e
"PKT3_OCCLUSION_QUERY", // 1f
"PKT3_SET_PREDICATION", // 20
- "UNK", // 21
+ "PKT3_REG_RMW", // 21
"PKT3_COND_EXEC", // 22
"PKT3_PRED_EXEC", // 23
"PKT3_DRAW_INDIRECT", // 24
@@ -99,8 +99,8 @@ static const char *pm4_pkt3_opcode_names[] = {
"PKT3_EVENT_WRITE", // 46
"PKT3_EVENT_WRITE_EOP", // 47
"PKT3_EVENT_WRITE_EOS", // 48
- "UNK", // 49
- "UNK", // 4a
+ "PKT3_RELEASE_MEM", // 49
+ "PKT3_PREAMBLE_CNTL", // 4a
"UNK", // 4b
"UNK", // 4c
"UNK", // 4d
@@ -115,15 +115,15 @@ static const char *pm4_pkt3_opcode_names[] = {
"UNK", // 56
"PKT3_ONE_REG_WRITE", // 57
"PKT3_ACQUIRE_MEM", // 58
- "UNK", // 59
+ "PKT3_REWIND", // 59
"UNK", // 5a
"UNK", // 5b
"UNK", // 5c
"UNK", // 5d
- "UNK", // 5e
- "UNK", // 5f
- "UNK", // 60
- "UNK", // 61
+ "PKT3_LOAD_UCONFIG_REG", // 5e
+ "PKT3_LOAD_SH_REG", // 5f
+ "PKT3_LOAD_CONFIG_REG", // 60
+ "PKT3_LOAD_CONTEXT_REG", // 61
"UNK", // 62
"UNK", // 63
"UNK", // 64
@@ -141,12 +141,12 @@ static const char *pm4_pkt3_opcode_names[] = {
"UNK", // 70
"UNK", // 71
"UNK", // 72
- "UNK", // 73
+ "PKT3_SET_CONTEXT_REG_INDIRECT", // 73
"UNK", // 74
"UNK", // 75
"PKT3_SET_SH_REG", // 76
"PKT3_SET_SH_REG_OFFSET", // 77
- "UNK", // 78
+ "PKT3_SET_QUEUE_REG", // 78
"PKT3_SET_UCONFIG_REG", // 79
"UNK", // 7a
"UNK", // 7b
@@ -162,10 +162,10 @@ static const char *pm4_pkt3_opcode_names[] = {
"PKT3_INCREMENT_DE_COUNTER", // 85
"PKT3_WAIT_ON_CE_COUNTER", // 86
"UNK", // 87
- "UNK", // 88
+ "PKT3_WAIT_ON_DE_COUNTER_DIFF", // 88
"UNK", // 89
"UNK", // 8a
- "UNK", // 8b
+ "PKT3_SWITCH_BUFFER", // 8b
"UNK", // 8c
"UNK", // 8d
"UNK", // 8e
@@ -186,9 +186,9 @@ static const char *pm4_pkt3_opcode_names[] = {
"UNK", // 9d
"UNK", // 9e
"UNK", // 9f
- "UNK", // a0
+ "PKT3_SET_RESOURCES", // a0
"UNK", // a1
- "UNK", // a2
+ "PKT3_MAP_QUEUES", // a2
"UNK", // a3
"UNK", // a4
"UNK", // a5
@@ -312,7 +312,7 @@ char *umr_reg_name(struct umr_asic *asic, uint64_t addr)
for (i = 0; i < asic->no_blocks; i++)
for (j = 0; j < asic->blocks[i]->no_regs; j++)
- if (asic->blocks[i]->regs[j].addr == addr)
+ if (asic->blocks[i]->regs[j].type == REG_MMIO && asic->blocks[i]->regs[j].addr == addr)
return asic->blocks[i]->regs[j].regname;
return "<unknown>";
}
@@ -409,6 +409,26 @@ static void print_decode_pm4_pkt3(struct umr_asic *asic, struct umr_ring_decoder
default: printf("Invalid word for opcode 0x%02lx", (unsigned long)decoder->pm4.cur_opcode);
}
break;
+ case 0x49: // RELEASE_MEM
+ switch(decoder->pm4.cur_word) {
+ case 0: printf("EOP_TCL1_ACTION: %lu, EOP_TC_ACTION: %lu, EOP_TC_WB_ACTION: %lu, EVENT_TYPE: %lu, EVENT_INDEX: %lu",
+ BITS(ib, 16, 17), BITS(ib, 17, 18), BITS(ib, 15, 16), BITS(ib, 0, 7), BITS(ib, 8, 15));
+ break;
+ case 1:
+ printf("DATA_SEL+INT_SEL: 0x%08lx", (unsigned long)ib);
+//DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)
+ break;
+ case 2: printf("ADDR_LO: 0x%08lx", (unsigned long)ib);
+ break;
+ case 3: printf("ADDR_HI: 0x%08lx", (unsigned long)ib);
+ break;
+ case 4: printf("SEQ_LO: 0x%08lx", (unsigned long)ib);
+ break;
+ case 5: printf("SEQ_HI: 0x%08lx", (unsigned long)ib);
+ break;
+ default: printf("Invalid word for opcode 0x%02lx", (unsigned long)decoder->pm4.cur_opcode);
+ }
+ break;
default:
printf("PKT3 DATA");
break;
diff --git a/src/umr.h b/src/umr.h
index bf33a27..f2bce13 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -152,7 +152,8 @@ struct umr_options {
use_pci,
use_colour,
read_smc,
- quiet;
+ quiet,
+ follow_ib;
unsigned
instance_bank,
se_bank,