diff options
author | Luca Barbieri <luca@luca-barbieri.com> | 2010-04-17 12:45:57 +0200 |
---|---|---|
committer | Luca Barbieri <luca@luca-barbieri.com> | 2010-04-17 12:54:42 +0200 |
commit | c2bf6e7ba6c77b345fe1394d90e2e53d0f0c964c (patch) | |
tree | 48fd730c301a55368b45e317b329f5ab9da4c845 |
Initial version
Currently nv40-only, but should be easy to get working on other cards.
Look at re.c in renouveau to find out how to find the FIFO.
-rw-r--r-- | Makefile | 8 | ||||
-rw-r--r-- | libnvdump.c | 472 | ||||
-rwxr-xr-x | nvdump | 21 |
3 files changed, 501 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..aa41547 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +all: libnvdump.so + +libnvdump.so: libnvdump.c + gcc -g -O2 -shared $^ -o $@ -ldl + +install: libnvdump.so nvdump + install nvdump /usr/bin + install libnvdump.so /usr/lib diff --git a/libnvdump.c b/libnvdump.c new file mode 100644 index 0000000..7147790 --- /dev/null +++ b/libnvdump.c @@ -0,0 +1,472 @@ +#define _GNU_SOURCE +#define open sys_open +#define open64 sys_open64 +#include <stdlib.h> +#include <memory.h> +#include <dlfcn.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/fcntl.h> +#include <sys/signal.h> +#include <sys/ucontext.h> +#include <stdint.h> +#undef open +#undef open64 + +#define NEXT(func) ({static typeof(func)* NEXT_##func; if(!NEXT_##func) NEXT_##func = (typeof(func)*)dlsym(RTLD_NEXT, #func); NEXT_##func;}) + +FILE* dump_file; + +struct nvidia_card +{ + unsigned busid; + unsigned long long vram_phys; + unsigned long long vram_bar_size; + unsigned long long mmio_phys; +// char* mmio; + + /* TODO: this should be in a channel struct */ + char* fifo; + unsigned fifo_base; + unsigned put; + unsigned pusher_return; + unsigned method_left; + + char* scratch; +}; + +// TODO: this assumes a single card + +struct nvidia_card single_card; + +struct nvidia_fd +{ + int fd; + struct nvidia_card* card; +}; + +struct nvidia_fd nvidia_fds[256]; +int num_nvidia_fds; + +struct nvidia_card* get_card(int fd) +{ + int i; + for(i = 0; i < num_nvidia_fds; ++i) + if(nvidia_fds[i].fd == fd) + return nvidia_fds[i].card; + return 0; +} + +struct sigaction app_sigsegv_action; + +struct nvidia_intercepted_mapping +{ + void* addr; + void* real_addr; + size_t size; + struct nvidia_card* card; + int channel; +}; + +struct nvidia_intercepted_mapping nvidia_intercepted_mappings[256]; +int num_nvidia_intercepted_mappings; + +static void +emulate_write32(struct nvidia_intercepted_mapping* nim, void* addr, unsigned value) +{ + unsigned off = addr - nim->addr; + + if(off == 0x40) { + unsigned* fifo_start = (unsigned*)((char*)nim->card->fifo + nim->card->put - nim->card->fifo_base); + unsigned* fifo_end = (unsigned*)((char*)nim->card->fifo + value - nim->card->fifo_base); + unsigned* p; + fprintf(dump_file, "# fifo from %x to %x\n", nim->card->put, value); + for(p = fifo_start; p != fifo_end; ++p) + { + unsigned v = *p; + fprintf(dump_file, "%x\n", v); + if(nim->card->method_left) + --nim->card->method_left; + else if((v & 3) == 1) { + fprintf(dump_file, "# jump to %x\n", v & ~3); + p = (unsigned*)((char*)nim->card->fifo + (v & ~3) - nim->card->fifo_base); + } else if((v & 3) == 2) { + fprintf(dump_file, "# call to %x\n", v & ~3); + nim->card->pusher_return = ((char*)p - nim->card->fifo) + nim->card->fifo_base; + p= (unsigned*)((char*)nim->card->fifo + (v & 0x1fffffff) - nim->card->fifo_base); + } else if((v & 0xe0000000) == 0x20000000) { + fprintf(dump_file, "# old-style jump to %x\n", v & 0x1fffffff); + p = (unsigned*)((char*)nim->card->fifo + (v & 0x1fffffff) - nim->card->fifo_base); + } else if(v == 0x00020000) { + fprintf(dump_file, "# return\n"); + p = (unsigned*)((char*)nim->card->fifo + nim->card->pusher_return - nim->card->fifo_base); + } else if(!(v & 0xa0000000)) + nim->card->method_left = (v >> 18) & 2047; + else + fprintf(dump_file, "# unknown ring value %x\n", v); + +// printf("at %u we have %x\n", p - nim->card->fifo, v); +// printf("%x\n", *p); + } + fprintf(dump_file, "# end\n"); + fflush(dump_file); + nim->card->put = value; + } +// fprintf(dump_file, "# W %x <- %x\n", off, value); + *(unsigned*)(nim->real_addr + off) = value; +} + +static unsigned +emulate_read32(struct nvidia_intercepted_mapping* nim, void* addr) +{ + unsigned off = addr - nim->addr; + unsigned value = *(unsigned*)(nim->real_addr + off); +// fprintf(dump_file, "R %x -> %x\n", off, value); + return value; +} + +/* awesome idea, this unnecessary arbitrary remapping */ +#if __WORDSIZE == 32 +unsigned reg_to_uctx[8] = {REG_EAX, REG_ECX, REG_EDX, REG_EBX, REG_ESP, REG_EBP, REG_ESI, REG_EDI}; +#else +unsigned reg_to_uctx[16] = {REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15}; +#endif + +static inline unsigned long* +uctx_reg(ucontext_t* uctx, unsigned reg) +{ + return (unsigned long*)&uctx->uc_mcontext.gregs[reg_to_uctx[reg]]; +} + +static void +emulate_instruction(struct nvidia_intercepted_mapping* nim, ucontext_t* uctx) +{ + uint8_t* code = (uint8_t*)uctx->uc_mcontext.gregs[REG_EIP]; + uint8_t* p = code; + unsigned long addr = 0; + unsigned mod; + long imm; + int reg = -1; + unsigned char rex = 0; + unsigned opcode; + +// fprintf(stderr, "x86 instruction accessing intercepted mapping:" +// " %02x %02x %02x %02x %02x %02x %02x %02x\n", +// p[0], p[1], p[2] ,p[3], p[4], p[5], p[6], p[7]); + + for(;;) { + if(0) {} +#if __WORDSIZE == 64 + if((*p & 0xf0) == 0x40) + rex = *p++; +#endif + else + break; + } + + opcode = *p++; + if(opcode == 0x89 || opcode == 0x8b) + goto modrm; + else + goto unhandled; + +modrm: + { + int rm = *p & 7; + reg = ((*p >> 3) & 7) + ((rex & 4) << 1); + mod = (*p & 0xc0); + ++p; + if(mod == 0) { + if(rm == 5) + goto disp32; + else if(rm == 4) + goto sib; + else + addr = *uctx_reg(uctx, rm + ((rex & 1) << 3)); + } else if(mod == 0x40) { /* mod1: disp8(%reg) */ + if(rm == 4) + goto sib; + else { + addr = *uctx_reg(uctx, rm + ((rex & 1) << 3)); + goto disp8; + } + } else if(mod == 0x80) { + if(rm == 4) + goto sib; + else { + addr = *uctx_reg(uctx, rm + ((rex & 1) << 3)); + goto disp32; + } + } else + goto unhandled; + } + +sib: + { + int si = (*p >> 3) & 7; + if(si != 4) + addr += *uctx_reg(uctx, si + ((rex & 2) << 2)) << (*p >> 6); + int base = (*p & 7); + ++p; + if(base == 5 && !(p[-2] & 0xc0)) + goto disp32; + else { + addr += *uctx_reg(uctx, base + ((rex & 1) << 3)); + if(mod == 0x40) + goto disp8; + else if(mod == 0x80) + goto disp32; + } + } + +disp8: + addr += *p++; + goto imm; + +disp32: + addr += p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24); + p += 4; + goto imm; + +imm: + goto end; + +end: + if(opcode == 0x89) + emulate_write32(nim, (void*)addr, *uctx_reg(uctx, reg)); + else if(opcode == 0x8b) + *uctx_reg(uctx, reg) = emulate_read32(nim, (void*)addr); + + uctx->uc_mcontext.gregs[REG_EIP] = (long)p; + return; + +unhandled: + p = code; + fprintf(stderr, "Fatal error: unhandled x86 instruction accessing intercepted mapping:" + " %02x %02x %02x %02x %02x %02x %02x %02x\n", + p[0], p[1], p[2] ,p[3], p[4], p[5], p[6], p[7]); + fflush(stderr); + abort(); +} + +static void +sigsegv_handler(int signum, siginfo_t *si, void *uctxp) +{ + ucontext_t* uctx = uctxp; + unsigned i; + + //fprintf(dump_file, "# SEGV on %p\n", si->si_addr); + + for(i = 0; i < num_nvidia_intercepted_mappings; ++i) { + int off = si->si_addr - nvidia_intercepted_mappings[i].addr; + if(off >= 0 && off < nvidia_intercepted_mappings[i].size) { + struct nvidia_card* card = nvidia_intercepted_mappings[i].card; + int channel = nvidia_intercepted_mappings[i].channel; + //fprintf(dump_file, "# INTERCEPTED: %p %i\n", si->si_addr, off); + emulate_instruction(&nvidia_intercepted_mappings[i], uctx); + return; + } + } + + // TODO: emulate uncommon sigaction behavior + if(app_sigsegv_action.sa_handler == (void*)SIG_IGN) + return; + else if(app_sigsegv_action.sa_handler == (void*)SIG_DFL) { + NEXT(signal)(SIGSEGV, SIG_DFL); + *(volatile int*)0 = 0; + raise(SIGSEGV); + abort(); + for(;;) {} + } + else + return app_sigsegv_action.sa_sigaction(signum, si, uctxp); +} + + +int inited; + +static void +init(void) +{ + if(inited) + return; + + inited = 1; + + char* dump_filename = getenv("NOUVEAU_DUMP"); + if(dump_filename) + dump_file = fopen(dump_filename, "w"); + if(!dump_file) + return; + + FILE* fp; + fp = fopen("/proc/bus/pci/devices", "r"); + char line[4096]; + while(fgets(line, sizeof(line), fp)) + { + unsigned busid, vendor_device, irq; + unsigned bars[7]; + unsigned sizes[7]; + char driver[256]; + + sscanf(line, "%x %x %x" + " %x %x %x %x %x %x %x" + " %x %x %x %x %x %x %x" + " %s\n", + &busid, &vendor_device, &irq, + &bars[0], &bars[1], &bars[2], &bars[3], &bars[4], &bars[5], &bars[6], + &sizes[0], &sizes[1], &sizes[2], &sizes[3], &sizes[4], &sizes[5], &sizes[6], + driver); + + if(!strcmp(driver, "nvidia")) { + struct nvidia_card* card = &single_card; + card->busid = busid; + card->vram_phys = bars[1] & ~0xfff; + card->vram_bar_size = sizes[1]; + card->mmio_phys = bars[0] & ~0xfff; + } + } + fclose(fp); + + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigsegv_handler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + NEXT(sigaction)(SIGSEGV, &sa, &app_sigsegv_action); +} + +int +sigaction(int signum, const struct sigaction *act, struct sigaction *oldact) +{ + if(signum == SIGSEGV) { + if(oldact) + memcpy(oldact, &app_sigsegv_action, sizeof(app_sigsegv_action)); + if(act) + memcpy(&app_sigsegv_action, act, sizeof(app_sigsegv_action)); + return 0; + } + + return NEXT(sigaction)(signum, act, oldact); +} + +sighandler_t +signal(int signum, sighandler_t handler) +{ + if(signum == SIGSEGV) { + sighandler_t old = app_sigsegv_action.sa_handler; + memset(&app_sigsegv_action, 0, sizeof(app_sigsegv_action)); + app_sigsegv_action.sa_handler = handler; + return old; + } + + return NEXT(signal)(signum, handler); +} + +static int +filter_open(const char* path, int fd) +{ + if(memcmp(path, "/dev/nvidia", 11)) + return fd; + + const char* p = path + 11; + for(; *p; ++p) { + if(!isdigit(*p)) + return fd; + } + + if(!dump_file) { + init(); + if(!dump_file) + return fd; + } + + fprintf(dump_file, "# open %i %s\n", fd, path); + + nvidia_fds[num_nvidia_fds].fd = fd; + nvidia_fds[num_nvidia_fds].card = &single_card; + ++num_nvidia_fds; + return fd; +} + +int +open(const char *path, int oflag, int mode) +{ + return filter_open(path, NEXT(open)(path, oflag, mode)); +} + +int +open64(const char *path, int oflag, int mode) +{ + return filter_open(path, NEXT(open64)(path, oflag, mode)); +} + +static void* +filter_mmap(struct nvidia_card* card, unsigned long long off, size_t len, void* p) +{ + if(!card) + return p; + + fprintf(dump_file, "#mmap off %Lx len %x => %p\n", off, len, p); + + int mmio = off - card->mmio_phys; + int user_channel = -1; + if(mmio >= 0 && mmio < (16 * 1024 * 1024)) { + int user; + user = mmio - 0x800000; + if(user >= 0 && !(user & 0xffff) && user < 32 * 0x10000 && len == 0x10000) + user_channel = user >> 16; + else { + user = mmio - 0xc00000; + if(user >= 0 && !(user & 0xfff) && user < 32 * 0x1000 && len == 0x1000) + user_channel = user >> 12; + else if(user >= 0 && !(user & 0x1fff) && user < 128 * 0x2000 && len == 0x2000) + user_channel = user >> 13; + } + } + + if(user_channel >= 0) + { + int fd = open("/dev/zero", O_RDWR, 0); + void* map = mmap(0, len, 0, MAP_SHARED, fd, 0); + close(fd); + struct nvidia_intercepted_mapping* nim = &nvidia_intercepted_mappings[num_nvidia_intercepted_mappings++]; + nim->card = card; + nim->channel = user_channel; + nim->addr = map; + nim->real_addr = p; + nim->size = len; + return map; + } + + if(len == 1024 * 1024) { +// if(!card->scratch) { +// card->scratch = p; +// printf("scratch\n"); +// } else { + { + fprintf(dump_file, "# <fifo>\n"); + card->fifo = p; + card->put = 0x160000; + card->fifo_base = 0x60000; + } + } + + return p; +} + +void* +mmap64(void *addr, size_t len, int prot, int flags, int fd, off64_t off) +{ + void* p = NEXT(mmap64)(addr, len, prot, flags, fd, off); + return filter_mmap(get_card(fd), off, len, p); +} + +void* +mmap(void *addr, size_t len, int prot, int flags, int fd, off_t off) +{ + void* p = NEXT(mmap)(addr, len, prot, flags, fd, off); + return filter_mmap(get_card(fd), off, len, p); +} + @@ -0,0 +1,21 @@ +#!/bin/bash +# This tool will dump the FIFO commands on both Nouveau and the blob +# For the blob, it needs libnvdump installed +# For Nouveau, it needs libdrm with the patch at http://www.mail-archive.com/nouveau@lists.freedesktop.org/msg05406.html applied +# +# -n will cause commands to not be submitted to the GPU (Nouveau only for now) + +if glxinfo 2>/dev/null|grep -q 'OpenGL vendor string: NVIDIA Corporation'; then + export LD_PRELOAD=libnvdump.so +fi + +if "$1" == "-n"; then + # Nouveau-only for now + export NOUVEAU_NO_SUBMIT=1 + shift +fi + +file="$1" +shift +export NOUVEAU_DUMP="$file" +exec "$@" |