summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Barbieri <luca@luca-barbieri.com>2010-04-17 12:45:57 +0200
committerLuca Barbieri <luca@luca-barbieri.com>2010-04-17 12:54:42 +0200
commitc2bf6e7ba6c77b345fe1394d90e2e53d0f0c964c (patch)
tree48fd730c301a55368b45e317b329f5ab9da4c845
Initial version
Currently nv40-only, but should be easy to get working on other cards. Look at re.c in renouveau to find out how to find the FIFO.
-rw-r--r--Makefile8
-rw-r--r--libnvdump.c472
-rwxr-xr-xnvdump21
3 files changed, 501 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..aa41547
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,8 @@
+all: libnvdump.so
+
+libnvdump.so: libnvdump.c
+ gcc -g -O2 -shared $^ -o $@ -ldl
+
+install: libnvdump.so nvdump
+ install nvdump /usr/bin
+ install libnvdump.so /usr/lib
diff --git a/libnvdump.c b/libnvdump.c
new file mode 100644
index 0000000..7147790
--- /dev/null
+++ b/libnvdump.c
@@ -0,0 +1,472 @@
+#define _GNU_SOURCE
+#define open sys_open
+#define open64 sys_open64
+#include <stdlib.h>
+#include <memory.h>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/fcntl.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <stdint.h>
+#undef open
+#undef open64
+
+#define NEXT(func) ({static typeof(func)* NEXT_##func; if(!NEXT_##func) NEXT_##func = (typeof(func)*)dlsym(RTLD_NEXT, #func); NEXT_##func;})
+
+FILE* dump_file;
+
+struct nvidia_card
+{
+ unsigned busid;
+ unsigned long long vram_phys;
+ unsigned long long vram_bar_size;
+ unsigned long long mmio_phys;
+// char* mmio;
+
+ /* TODO: this should be in a channel struct */
+ char* fifo;
+ unsigned fifo_base;
+ unsigned put;
+ unsigned pusher_return;
+ unsigned method_left;
+
+ char* scratch;
+};
+
+// TODO: this assumes a single card
+
+struct nvidia_card single_card;
+
+struct nvidia_fd
+{
+ int fd;
+ struct nvidia_card* card;
+};
+
+struct nvidia_fd nvidia_fds[256];
+int num_nvidia_fds;
+
+struct nvidia_card* get_card(int fd)
+{
+ int i;
+ for(i = 0; i < num_nvidia_fds; ++i)
+ if(nvidia_fds[i].fd == fd)
+ return nvidia_fds[i].card;
+ return 0;
+}
+
+struct sigaction app_sigsegv_action;
+
+struct nvidia_intercepted_mapping
+{
+ void* addr;
+ void* real_addr;
+ size_t size;
+ struct nvidia_card* card;
+ int channel;
+};
+
+struct nvidia_intercepted_mapping nvidia_intercepted_mappings[256];
+int num_nvidia_intercepted_mappings;
+
+static void
+emulate_write32(struct nvidia_intercepted_mapping* nim, void* addr, unsigned value)
+{
+ unsigned off = addr - nim->addr;
+
+ if(off == 0x40) {
+ unsigned* fifo_start = (unsigned*)((char*)nim->card->fifo + nim->card->put - nim->card->fifo_base);
+ unsigned* fifo_end = (unsigned*)((char*)nim->card->fifo + value - nim->card->fifo_base);
+ unsigned* p;
+ fprintf(dump_file, "# fifo from %x to %x\n", nim->card->put, value);
+ for(p = fifo_start; p != fifo_end; ++p)
+ {
+ unsigned v = *p;
+ fprintf(dump_file, "%x\n", v);
+ if(nim->card->method_left)
+ --nim->card->method_left;
+ else if((v & 3) == 1) {
+ fprintf(dump_file, "# jump to %x\n", v & ~3);
+ p = (unsigned*)((char*)nim->card->fifo + (v & ~3) - nim->card->fifo_base);
+ } else if((v & 3) == 2) {
+ fprintf(dump_file, "# call to %x\n", v & ~3);
+ nim->card->pusher_return = ((char*)p - nim->card->fifo) + nim->card->fifo_base;
+ p= (unsigned*)((char*)nim->card->fifo + (v & 0x1fffffff) - nim->card->fifo_base);
+ } else if((v & 0xe0000000) == 0x20000000) {
+ fprintf(dump_file, "# old-style jump to %x\n", v & 0x1fffffff);
+ p = (unsigned*)((char*)nim->card->fifo + (v & 0x1fffffff) - nim->card->fifo_base);
+ } else if(v == 0x00020000) {
+ fprintf(dump_file, "# return\n");
+ p = (unsigned*)((char*)nim->card->fifo + nim->card->pusher_return - nim->card->fifo_base);
+ } else if(!(v & 0xa0000000))
+ nim->card->method_left = (v >> 18) & 2047;
+ else
+ fprintf(dump_file, "# unknown ring value %x\n", v);
+
+// printf("at %u we have %x\n", p - nim->card->fifo, v);
+// printf("%x\n", *p);
+ }
+ fprintf(dump_file, "# end\n");
+ fflush(dump_file);
+ nim->card->put = value;
+ }
+// fprintf(dump_file, "# W %x <- %x\n", off, value);
+ *(unsigned*)(nim->real_addr + off) = value;
+}
+
+static unsigned
+emulate_read32(struct nvidia_intercepted_mapping* nim, void* addr)
+{
+ unsigned off = addr - nim->addr;
+ unsigned value = *(unsigned*)(nim->real_addr + off);
+// fprintf(dump_file, "R %x -> %x\n", off, value);
+ return value;
+}
+
+/* awesome idea, this unnecessary arbitrary remapping */
+#if __WORDSIZE == 32
+unsigned reg_to_uctx[8] = {REG_EAX, REG_ECX, REG_EDX, REG_EBX, REG_ESP, REG_EBP, REG_ESI, REG_EDI};
+#else
+unsigned reg_to_uctx[16] = {REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15};
+#endif
+
+static inline unsigned long*
+uctx_reg(ucontext_t* uctx, unsigned reg)
+{
+ return (unsigned long*)&uctx->uc_mcontext.gregs[reg_to_uctx[reg]];
+}
+
+static void
+emulate_instruction(struct nvidia_intercepted_mapping* nim, ucontext_t* uctx)
+{
+ uint8_t* code = (uint8_t*)uctx->uc_mcontext.gregs[REG_EIP];
+ uint8_t* p = code;
+ unsigned long addr = 0;
+ unsigned mod;
+ long imm;
+ int reg = -1;
+ unsigned char rex = 0;
+ unsigned opcode;
+
+// fprintf(stderr, "x86 instruction accessing intercepted mapping:"
+// " %02x %02x %02x %02x %02x %02x %02x %02x\n",
+// p[0], p[1], p[2] ,p[3], p[4], p[5], p[6], p[7]);
+
+ for(;;) {
+ if(0) {}
+#if __WORDSIZE == 64
+ if((*p & 0xf0) == 0x40)
+ rex = *p++;
+#endif
+ else
+ break;
+ }
+
+ opcode = *p++;
+ if(opcode == 0x89 || opcode == 0x8b)
+ goto modrm;
+ else
+ goto unhandled;
+
+modrm:
+ {
+ int rm = *p & 7;
+ reg = ((*p >> 3) & 7) + ((rex & 4) << 1);
+ mod = (*p & 0xc0);
+ ++p;
+ if(mod == 0) {
+ if(rm == 5)
+ goto disp32;
+ else if(rm == 4)
+ goto sib;
+ else
+ addr = *uctx_reg(uctx, rm + ((rex & 1) << 3));
+ } else if(mod == 0x40) { /* mod1: disp8(%reg) */
+ if(rm == 4)
+ goto sib;
+ else {
+ addr = *uctx_reg(uctx, rm + ((rex & 1) << 3));
+ goto disp8;
+ }
+ } else if(mod == 0x80) {
+ if(rm == 4)
+ goto sib;
+ else {
+ addr = *uctx_reg(uctx, rm + ((rex & 1) << 3));
+ goto disp32;
+ }
+ } else
+ goto unhandled;
+ }
+
+sib:
+ {
+ int si = (*p >> 3) & 7;
+ if(si != 4)
+ addr += *uctx_reg(uctx, si + ((rex & 2) << 2)) << (*p >> 6);
+ int base = (*p & 7);
+ ++p;
+ if(base == 5 && !(p[-2] & 0xc0))
+ goto disp32;
+ else {
+ addr += *uctx_reg(uctx, base + ((rex & 1) << 3));
+ if(mod == 0x40)
+ goto disp8;
+ else if(mod == 0x80)
+ goto disp32;
+ }
+ }
+
+disp8:
+ addr += *p++;
+ goto imm;
+
+disp32:
+ addr += p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+ p += 4;
+ goto imm;
+
+imm:
+ goto end;
+
+end:
+ if(opcode == 0x89)
+ emulate_write32(nim, (void*)addr, *uctx_reg(uctx, reg));
+ else if(opcode == 0x8b)
+ *uctx_reg(uctx, reg) = emulate_read32(nim, (void*)addr);
+
+ uctx->uc_mcontext.gregs[REG_EIP] = (long)p;
+ return;
+
+unhandled:
+ p = code;
+ fprintf(stderr, "Fatal error: unhandled x86 instruction accessing intercepted mapping:"
+ " %02x %02x %02x %02x %02x %02x %02x %02x\n",
+ p[0], p[1], p[2] ,p[3], p[4], p[5], p[6], p[7]);
+ fflush(stderr);
+ abort();
+}
+
+static void
+sigsegv_handler(int signum, siginfo_t *si, void *uctxp)
+{
+ ucontext_t* uctx = uctxp;
+ unsigned i;
+
+ //fprintf(dump_file, "# SEGV on %p\n", si->si_addr);
+
+ for(i = 0; i < num_nvidia_intercepted_mappings; ++i) {
+ int off = si->si_addr - nvidia_intercepted_mappings[i].addr;
+ if(off >= 0 && off < nvidia_intercepted_mappings[i].size) {
+ struct nvidia_card* card = nvidia_intercepted_mappings[i].card;
+ int channel = nvidia_intercepted_mappings[i].channel;
+ //fprintf(dump_file, "# INTERCEPTED: %p %i\n", si->si_addr, off);
+ emulate_instruction(&nvidia_intercepted_mappings[i], uctx);
+ return;
+ }
+ }
+
+ // TODO: emulate uncommon sigaction behavior
+ if(app_sigsegv_action.sa_handler == (void*)SIG_IGN)
+ return;
+ else if(app_sigsegv_action.sa_handler == (void*)SIG_DFL) {
+ NEXT(signal)(SIGSEGV, SIG_DFL);
+ *(volatile int*)0 = 0;
+ raise(SIGSEGV);
+ abort();
+ for(;;) {}
+ }
+ else
+ return app_sigsegv_action.sa_sigaction(signum, si, uctxp);
+}
+
+
+int inited;
+
+static void
+init(void)
+{
+ if(inited)
+ return;
+
+ inited = 1;
+
+ char* dump_filename = getenv("NOUVEAU_DUMP");
+ if(dump_filename)
+ dump_file = fopen(dump_filename, "w");
+ if(!dump_file)
+ return;
+
+ FILE* fp;
+ fp = fopen("/proc/bus/pci/devices", "r");
+ char line[4096];
+ while(fgets(line, sizeof(line), fp))
+ {
+ unsigned busid, vendor_device, irq;
+ unsigned bars[7];
+ unsigned sizes[7];
+ char driver[256];
+
+ sscanf(line, "%x %x %x"
+ " %x %x %x %x %x %x %x"
+ " %x %x %x %x %x %x %x"
+ " %s\n",
+ &busid, &vendor_device, &irq,
+ &bars[0], &bars[1], &bars[2], &bars[3], &bars[4], &bars[5], &bars[6],
+ &sizes[0], &sizes[1], &sizes[2], &sizes[3], &sizes[4], &sizes[5], &sizes[6],
+ driver);
+
+ if(!strcmp(driver, "nvidia")) {
+ struct nvidia_card* card = &single_card;
+ card->busid = busid;
+ card->vram_phys = bars[1] & ~0xfff;
+ card->vram_bar_size = sizes[1];
+ card->mmio_phys = bars[0] & ~0xfff;
+ }
+ }
+ fclose(fp);
+
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigsegv_handler;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ NEXT(sigaction)(SIGSEGV, &sa, &app_sigsegv_action);
+}
+
+int
+sigaction(int signum, const struct sigaction *act, struct sigaction *oldact)
+{
+ if(signum == SIGSEGV) {
+ if(oldact)
+ memcpy(oldact, &app_sigsegv_action, sizeof(app_sigsegv_action));
+ if(act)
+ memcpy(&app_sigsegv_action, act, sizeof(app_sigsegv_action));
+ return 0;
+ }
+
+ return NEXT(sigaction)(signum, act, oldact);
+}
+
+sighandler_t
+signal(int signum, sighandler_t handler)
+{
+ if(signum == SIGSEGV) {
+ sighandler_t old = app_sigsegv_action.sa_handler;
+ memset(&app_sigsegv_action, 0, sizeof(app_sigsegv_action));
+ app_sigsegv_action.sa_handler = handler;
+ return old;
+ }
+
+ return NEXT(signal)(signum, handler);
+}
+
+static int
+filter_open(const char* path, int fd)
+{
+ if(memcmp(path, "/dev/nvidia", 11))
+ return fd;
+
+ const char* p = path + 11;
+ for(; *p; ++p) {
+ if(!isdigit(*p))
+ return fd;
+ }
+
+ if(!dump_file) {
+ init();
+ if(!dump_file)
+ return fd;
+ }
+
+ fprintf(dump_file, "# open %i %s\n", fd, path);
+
+ nvidia_fds[num_nvidia_fds].fd = fd;
+ nvidia_fds[num_nvidia_fds].card = &single_card;
+ ++num_nvidia_fds;
+ return fd;
+}
+
+int
+open(const char *path, int oflag, int mode)
+{
+ return filter_open(path, NEXT(open)(path, oflag, mode));
+}
+
+int
+open64(const char *path, int oflag, int mode)
+{
+ return filter_open(path, NEXT(open64)(path, oflag, mode));
+}
+
+static void*
+filter_mmap(struct nvidia_card* card, unsigned long long off, size_t len, void* p)
+{
+ if(!card)
+ return p;
+
+ fprintf(dump_file, "#mmap off %Lx len %x => %p\n", off, len, p);
+
+ int mmio = off - card->mmio_phys;
+ int user_channel = -1;
+ if(mmio >= 0 && mmio < (16 * 1024 * 1024)) {
+ int user;
+ user = mmio - 0x800000;
+ if(user >= 0 && !(user & 0xffff) && user < 32 * 0x10000 && len == 0x10000)
+ user_channel = user >> 16;
+ else {
+ user = mmio - 0xc00000;
+ if(user >= 0 && !(user & 0xfff) && user < 32 * 0x1000 && len == 0x1000)
+ user_channel = user >> 12;
+ else if(user >= 0 && !(user & 0x1fff) && user < 128 * 0x2000 && len == 0x2000)
+ user_channel = user >> 13;
+ }
+ }
+
+ if(user_channel >= 0)
+ {
+ int fd = open("/dev/zero", O_RDWR, 0);
+ void* map = mmap(0, len, 0, MAP_SHARED, fd, 0);
+ close(fd);
+ struct nvidia_intercepted_mapping* nim = &nvidia_intercepted_mappings[num_nvidia_intercepted_mappings++];
+ nim->card = card;
+ nim->channel = user_channel;
+ nim->addr = map;
+ nim->real_addr = p;
+ nim->size = len;
+ return map;
+ }
+
+ if(len == 1024 * 1024) {
+// if(!card->scratch) {
+// card->scratch = p;
+// printf("scratch\n");
+// } else {
+ {
+ fprintf(dump_file, "# <fifo>\n");
+ card->fifo = p;
+ card->put = 0x160000;
+ card->fifo_base = 0x60000;
+ }
+ }
+
+ return p;
+}
+
+void*
+mmap64(void *addr, size_t len, int prot, int flags, int fd, off64_t off)
+{
+ void* p = NEXT(mmap64)(addr, len, prot, flags, fd, off);
+ return filter_mmap(get_card(fd), off, len, p);
+}
+
+void*
+mmap(void *addr, size_t len, int prot, int flags, int fd, off_t off)
+{
+ void* p = NEXT(mmap)(addr, len, prot, flags, fd, off);
+ return filter_mmap(get_card(fd), off, len, p);
+}
+
diff --git a/nvdump b/nvdump
new file mode 100755
index 0000000..a062326
--- /dev/null
+++ b/nvdump
@@ -0,0 +1,21 @@
+#!/bin/bash
+# This tool will dump the FIFO commands on both Nouveau and the blob
+# For the blob, it needs libnvdump installed
+# For Nouveau, it needs libdrm with the patch at http://www.mail-archive.com/nouveau@lists.freedesktop.org/msg05406.html applied
+#
+# -n will cause commands to not be submitted to the GPU (Nouveau only for now)
+
+if glxinfo 2>/dev/null|grep -q 'OpenGL vendor string: NVIDIA Corporation'; then
+ export LD_PRELOAD=libnvdump.so
+fi
+
+if "$1" == "-n"; then
+ # Nouveau-only for now
+ export NOUVEAU_NO_SUBMIT=1
+ shift
+fi
+
+file="$1"
+shift
+export NOUVEAU_DUMP="$file"
+exec "$@"