diff options
author | Tom St Denis <tom.stdenis@amd.com> | 2017-02-04 13:31:17 -0500 |
---|---|---|
committer | Tom St Denis <tom.stdenis@amd.com> | 2017-02-04 13:31:17 -0500 |
commit | f46decaa0016367b2745763c489ba72ed469287c (patch) | |
tree | 909f2e347f3a4bcb4a7e7d33615fde786b0c2192 | |
parent | d0db3474f11d8dcc1dac6c6c078b6a7483fc45e5 (diff) |
Add initial ib-following code, polish README for public release, and
add missing PKT3 opcodes.
Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
-rw-r--r-- | README | 24 | ||||
-rw-r--r-- | src/app/main.c | 2 | ||||
-rw-r--r-- | src/app/ring_read.c | 4 | ||||
-rw-r--r-- | src/lib/dump_ib.c | 1 | ||||
-rw-r--r-- | src/lib/read_vram.c | 142 | ||||
-rw-r--r-- | src/lib/ring_decode.c | 54 | ||||
-rw-r--r-- | src/umr.h | 3 |
7 files changed, 201 insertions, 29 deletions
@@ -1,4 +1,4 @@ -UserMode Register Debugger for AMDGPU Hardware +User Mode Register Debugger for AMDGPU Hardware Copyright (c) 2017 AMD Inc. Introduction @@ -18,19 +18,19 @@ to them. The tool is open source and hosted at - : TBD + : https://cgit.freedesktop.org/amd/umr/ Users may report bugs, feedback, and submit patches to the amd-gfx mailing list at: - https://lists.freedesktop.org/mailman/listinfo/amd-gfx + : https://lists.freedesktop.org/mailman/listinfo/amd-gfx Building --------- -To build umr you will need pciaccess and ncurses headers. Which are -available in both Fedora and Ubuntu (as well as other distributions). -To build simply invoke the make command +To build umr you will need pciaccess and ncurses headers and libraries. +Which are available in both Fedora and Ubuntu (as well as other +distributions). To build simply invoke the make command $ make @@ -76,10 +76,14 @@ Selecting Hardware ------------------- On machines with more than one AMDGPU device umr can be instructed -which to look at. If the driver is loaded and display attached +which to look at. If the driver is loaded and display attached [*] the --instance option can select a device. The devices can be listed -with the --enumerate option. Once one is picked the number can be used -for instance: +with the --enumerate option. + +[*] Kernels v4.11 and newer only require the amdgpu module to be loaded +in order to work without '-f'. + +Once one is picked the number can be used for instance: $ umr -i 1 -s uvd6 @@ -172,7 +176,7 @@ Wavefront Debugging The status of valid wavefronts can be displayed with the --waves command. The bits option enables bitfield decoding. Normally, -the colums would be pretty printed: +the columns would be pretty printed: $ umr -wa | column -t diff --git a/src/app/main.c b/src/app/main.c index fb71083..5b16946 100644 --- a/src/app/main.c +++ b/src/app/main.c @@ -82,6 +82,8 @@ static void parse_options(char *str) options.read_smc = 1; } else if (!strcmp(option, "quiet")) { options.quiet = 1; + } else if (!strcmp(option, "follow_ib")) { + options.follow_ib = 1; } else { printf("error: Unknown option [%s]\n", option); exit(EXIT_FAILURE); diff --git a/src/app/ring_read.c b/src/app/ring_read.c index 52206de..1726573 100644 --- a/src/app/ring_read.c +++ b/src/app/ring_read.c @@ -129,7 +129,9 @@ void umr_read_ring(struct umr_asic *asic, char *ringpath) pdecoder = decoder.next_ib; while (pdecoder) { - umr_dump_ib(asic, pdecoder); + if (asic->options.follow_ib) { + umr_dump_ib(asic, pdecoder); + } ppdecoder = pdecoder->next_ib; free(pdecoder); pdecoder = ppdecoder; diff --git a/src/lib/dump_ib.c b/src/lib/dump_ib.c index f88ed3f..4e81dbe 100644 --- a/src/lib/dump_ib.c +++ b/src/lib/dump_ib.c @@ -38,6 +38,7 @@ void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder) data = calloc(sizeof(*data), decoder->next_ib_info.size/sizeof(*data)); if (data && !umr_read_vram(asic, decoder->next_ib_info.vmid, decoder->next_ib_info.ib_addr, decoder->next_ib_info.size, data)) { // dump IB + decoder->pm4.cur_opcode = 0xFFFFFFFF; for (x = 0; x < decoder->next_ib_info.size/4; x++) { printf("IB[%5u] = 0x%08lx ... ", (unsigned)x, (unsigned long)data[x]); umr_print_decode(asic, decoder, data[x]); diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c index 705d611..34d25fd 100644 --- a/src/lib/read_vram.c +++ b/src/lib/read_vram.c @@ -51,6 +51,144 @@ static void read_via_mmio(struct umr_asic *asic, uint64_t address, uint32_t size } } +#define DEBUG printf + +static int umr_read_sram(uint64_t address, uint32_t size, void *dst) +{ + int fd; + + fd = open("/dev/mem", O_RDWR); + if (fd >= 0) { + memset(dst, 0, size); + lseek(fd, address, SEEK_SET); + if (read(fd, dst, size) != size) { + close(fd); + return -1; + } + close(fd); + return 0; + } + return -1; +} + + +static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32_t size, void *dst) +{ + uint64_t start_addr, page_table_start_addr, page_table_base_addr, + page_table_size, pte_idx, pde_idx, pte_entry, pde_entry; + uint32_t chunk_size, tmp; + int page_table_depth; + struct { + uint64_t + frag_size, + pte_base_addr, + valid; + } pde_fields; + struct { + uint64_t + page_base_addr, + fragment, + system; + } pte_fields; + char buf[64]; + unsigned char *pdst = dst; + + /* + * PTE format on VI: + * 63:40 reserved + * 39:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 reserved + * 2 snooped + * 1 system + * 0 valid + * + * PDE format on VI: + * 63:59 block fragment size + * 58:40 reserved + * 39:1 physical base address of PTE + * bits 5:1 must be 0. + * 0 valid + */ + + // read vm registers + sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR", (int)vmid ? 1 : 0); + page_table_start_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12; + + sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid ? 1 : 0); + tmp = umr_read_reg_by_name(asic, buf); + page_table_depth = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_DEPTH", tmp); + page_table_size = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_BLOCK_SIZE", tmp); + + sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR", (int)vmid); + page_table_base_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12; + + DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr); + DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long long)page_table_base_addr); + DEBUG("BASE_SIZE = %lu\n", page_table_size); + DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth); + + address -= page_table_start_addr; + + while (size) { + if (page_table_depth == 1) { + // decode addr into pte and pde selectors... + pde_idx = (address >> (12 + 9 + page_table_size)) & ((1UL << (19 - page_table_size)) - 1); + pte_idx = (address >> 12) & ((1UL << (9 + page_table_size)) - 1); + + // read PDE entry + umr_read_vram(asic, 0xFFFF, page_table_base_addr + pde_idx * 8, 8, &pde_entry); + + // decode PDE values + pde_fields.frag_size = (pde_entry >> 59) & 0x1F; + pde_fields.pte_base_addr = pde_entry & 0xFFFFFFF000; + pde_fields.valid = pde_entry & 1; + DEBUG("pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, (int)pde_fields.valid); + + // now read PTE entry for this page + umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry); + + // decode PTE values + pte_fields.page_base_addr = pte_entry & 0xFFFFFFF000; + pte_fields.fragment = (pte_entry >> 7) & 0x1F; + pte_fields.system = (pte_entry >> 1) & 1; + DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d\n", (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system); + + // compute starting address + start_addr = pte_fields.page_base_addr + (address & 0xFFF); + } else { + fprintf(stderr, "[BUG] depth 0 page tables are not yet supported\n"); + return -1; + } + + // read upto 4K from it + if (((start_addr & 0xFFF) + size) & ~0xFFF) { + chunk_size = 0x1000 - (start_addr & 0xFFF); + } else { + chunk_size = size; + } + DEBUG("Computed address we will read from: %s:%llx (reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram", (unsigned long long)start_addr, (unsigned long)chunk_size); + if (pte_fields.system) { + if (umr_read_sram(start_addr, chunk_size, pdst) < 0) { + fprintf(stderr, "[ERROR] Cannot read system ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n"); + return -1; + } + } else { + if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) { + fprintf(stderr, "[ERROR] Cannot read from VRAM\n"); + return -1; + } + } + pdst += chunk_size; + size -= chunk_size; + address += chunk_size; + } + return 0; +} + int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32_t size, void *dst) { @@ -74,5 +212,9 @@ int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32 } return 0; } + + if (asic->family == FAMILY_VI) + return umr_read_vram_vi(asic, vmid, address, size, dst); + return 0; } diff --git a/src/lib/ring_decode.c b/src/lib/ring_decode.c index f2c27d0..401d6ca 100644 --- a/src/lib/ring_decode.c +++ b/src/lib/ring_decode.c @@ -55,11 +55,11 @@ static const char *pm4_pkt3_opcode_names[] = { "UNK", // 1a "UNK", // 1b "UNK", // 1c - "UNK", // 1d - "UNK", // 1e + "PKT3_ATOMIC_GDS", // 1d + "PKT3_ATOMIC_MEM", // 1e "PKT3_OCCLUSION_QUERY", // 1f "PKT3_SET_PREDICATION", // 20 - "UNK", // 21 + "PKT3_REG_RMW", // 21 "PKT3_COND_EXEC", // 22 "PKT3_PRED_EXEC", // 23 "PKT3_DRAW_INDIRECT", // 24 @@ -99,8 +99,8 @@ static const char *pm4_pkt3_opcode_names[] = { "PKT3_EVENT_WRITE", // 46 "PKT3_EVENT_WRITE_EOP", // 47 "PKT3_EVENT_WRITE_EOS", // 48 - "UNK", // 49 - "UNK", // 4a + "PKT3_RELEASE_MEM", // 49 + "PKT3_PREAMBLE_CNTL", // 4a "UNK", // 4b "UNK", // 4c "UNK", // 4d @@ -115,15 +115,15 @@ static const char *pm4_pkt3_opcode_names[] = { "UNK", // 56 "PKT3_ONE_REG_WRITE", // 57 "PKT3_ACQUIRE_MEM", // 58 - "UNK", // 59 + "PKT3_REWIND", // 59 "UNK", // 5a "UNK", // 5b "UNK", // 5c "UNK", // 5d - "UNK", // 5e - "UNK", // 5f - "UNK", // 60 - "UNK", // 61 + "PKT3_LOAD_UCONFIG_REG", // 5e + "PKT3_LOAD_SH_REG", // 5f + "PKT3_LOAD_CONFIG_REG", // 60 + "PKT3_LOAD_CONTEXT_REG", // 61 "UNK", // 62 "UNK", // 63 "UNK", // 64 @@ -141,12 +141,12 @@ static const char *pm4_pkt3_opcode_names[] = { "UNK", // 70 "UNK", // 71 "UNK", // 72 - "UNK", // 73 + "PKT3_SET_CONTEXT_REG_INDIRECT", // 73 "UNK", // 74 "UNK", // 75 "PKT3_SET_SH_REG", // 76 "PKT3_SET_SH_REG_OFFSET", // 77 - "UNK", // 78 + "PKT3_SET_QUEUE_REG", // 78 "PKT3_SET_UCONFIG_REG", // 79 "UNK", // 7a "UNK", // 7b @@ -162,10 +162,10 @@ static const char *pm4_pkt3_opcode_names[] = { "PKT3_INCREMENT_DE_COUNTER", // 85 "PKT3_WAIT_ON_CE_COUNTER", // 86 "UNK", // 87 - "UNK", // 88 + "PKT3_WAIT_ON_DE_COUNTER_DIFF", // 88 "UNK", // 89 "UNK", // 8a - "UNK", // 8b + "PKT3_SWITCH_BUFFER", // 8b "UNK", // 8c "UNK", // 8d "UNK", // 8e @@ -186,9 +186,9 @@ static const char *pm4_pkt3_opcode_names[] = { "UNK", // 9d "UNK", // 9e "UNK", // 9f - "UNK", // a0 + "PKT3_SET_RESOURCES", // a0 "UNK", // a1 - "UNK", // a2 + "PKT3_MAP_QUEUES", // a2 "UNK", // a3 "UNK", // a4 "UNK", // a5 @@ -312,7 +312,7 @@ char *umr_reg_name(struct umr_asic *asic, uint64_t addr) for (i = 0; i < asic->no_blocks; i++) for (j = 0; j < asic->blocks[i]->no_regs; j++) - if (asic->blocks[i]->regs[j].addr == addr) + if (asic->blocks[i]->regs[j].type == REG_MMIO && asic->blocks[i]->regs[j].addr == addr) return asic->blocks[i]->regs[j].regname; return "<unknown>"; } @@ -409,6 +409,26 @@ static void print_decode_pm4_pkt3(struct umr_asic *asic, struct umr_ring_decoder default: printf("Invalid word for opcode 0x%02lx", (unsigned long)decoder->pm4.cur_opcode); } break; + case 0x49: // RELEASE_MEM + switch(decoder->pm4.cur_word) { + case 0: printf("EOP_TCL1_ACTION: %lu, EOP_TC_ACTION: %lu, EOP_TC_WB_ACTION: %lu, EVENT_TYPE: %lu, EVENT_INDEX: %lu", + BITS(ib, 16, 17), BITS(ib, 17, 18), BITS(ib, 15, 16), BITS(ib, 0, 7), BITS(ib, 8, 15)); + break; + case 1: + printf("DATA_SEL+INT_SEL: 0x%08lx", (unsigned long)ib); +//DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0) + break; + case 2: printf("ADDR_LO: 0x%08lx", (unsigned long)ib); + break; + case 3: printf("ADDR_HI: 0x%08lx", (unsigned long)ib); + break; + case 4: printf("SEQ_LO: 0x%08lx", (unsigned long)ib); + break; + case 5: printf("SEQ_HI: 0x%08lx", (unsigned long)ib); + break; + default: printf("Invalid word for opcode 0x%02lx", (unsigned long)decoder->pm4.cur_opcode); + } + break; default: printf("PKT3 DATA"); break; @@ -152,7 +152,8 @@ struct umr_options { use_pci, use_colour, read_smc, - quiet; + quiet, + follow_ib; unsigned instance_bank, se_bank, |