Initial version

Currently nv40-only, but should be easy to get working on other cards. Look at re.c in renouveau to find out how to find the FIFO.
author: Luca Barbieri <luca@luca-barbieri.com> 2010-04-17 12:45:57 +0200
committer: Luca Barbieri <luca@luca-barbieri.com> 2010-04-17 12:54:42 +0200
commit: c2bf6e7ba6c77b345fe1394d90e2e53d0f0c964c (patch)
tree: 48fd730c301a55368b45e317b329f5ab9da4c845
3 files changed, 501 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..aa41547
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,8 @@
+all: libnvdump.so
+
+libnvdump.so: libnvdump.c
+	gcc -g -O2 -shared $^ -o $@ -ldl
+
+install: libnvdump.so nvdump
+	install nvdump /usr/bin
+	install libnvdump.so /usr/lib
diff --git a/libnvdump.c b/libnvdump.c
new file mode 100644
index 0000000..7147790
--- /dev/null
+++ b/libnvdump.c
@@ -0,0 +1,472 @@
+#define _GNU_SOURCE
+#define open sys_open
+#define open64 sys_open64
+#include <stdlib.h>
+#include <memory.h>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/fcntl.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <stdint.h>
+#undef open
+#undef open64
+
+#define NEXT(func) ({static typeof(func)* NEXT_##func; if(!NEXT_##func) NEXT_##func = (typeof(func)*)dlsym(RTLD_NEXT, #func); NEXT_##func;})
+
+FILE* dump_file;
+
+struct nvidia_card
+{
+	unsigned busid;
+	unsigned long long vram_phys;
+	unsigned long long vram_bar_size;
+	unsigned long long mmio_phys;
+//	char* mmio;
+
+	/* TODO: this should be in a channel struct */
+	char* fifo;
+	unsigned fifo_base;
+	unsigned put;
+	unsigned pusher_return;
+	unsigned method_left;
+
+	char* scratch;
+};
+
+// TODO: this assumes a single card
+
+struct nvidia_card single_card;
+
+struct nvidia_fd
+{
+	int fd;
+	struct nvidia_card* card;
+};
+
+struct nvidia_fd nvidia_fds[256];
+int num_nvidia_fds;
+
+struct nvidia_card* get_card(int fd)
+{
+	int i;
+	for(i = 0; i < num_nvidia_fds; ++i)
+		if(nvidia_fds[i].fd == fd)
+			return nvidia_fds[i].card;
+	return 0;
+}
+
+struct sigaction app_sigsegv_action;
+
+struct nvidia_intercepted_mapping
+{
+	void* addr;
+	void* real_addr;
+	size_t size;
+	struct nvidia_card* card;
+	int channel;
+};
+
+struct nvidia_intercepted_mapping nvidia_intercepted_mappings[256];
+int num_nvidia_intercepted_mappings;
+
+static void
+emulate_write32(struct nvidia_intercepted_mapping* nim, void* addr, unsigned value)
+{
+	unsigned off = addr - nim->addr;
+
+	if(off == 0x40) {
+		unsigned* fifo_start = (unsigned*)((char*)nim->card->fifo + nim->card->put - nim->card->fifo_base);
+		unsigned* fifo_end = (unsigned*)((char*)nim->card->fifo + value - nim->card->fifo_base);
+		unsigned* p;
+		fprintf(dump_file, "# fifo from %x to %x\n", nim->card->put, value);
+		for(p = fifo_start; p != fifo_end; ++p)
+		{
+			unsigned v = *p;
+			fprintf(dump_file, "%x\n", v);
+			if(nim->card->method_left)
+				--nim->card->method_left;
+			else if((v & 3) == 1) {
+				fprintf(dump_file, "# jump to %x\n", v & ~3);
+				p = (unsigned*)((char*)nim->card->fifo + (v & ~3) - nim->card->fifo_base);
+			} else if((v & 3) == 2) {
+				fprintf(dump_file, "# call to %x\n", v & ~3);
+				nim->card->pusher_return = ((char*)p - nim->card->fifo) + nim->card->fifo_base;
+				p= (unsigned*)((char*)nim->card->fifo + (v & 0x1fffffff) - nim->card->fifo_base);
+			} else if((v & 0xe0000000) == 0x20000000) {
+				fprintf(dump_file, "# old-style jump to %x\n", v & 0x1fffffff);
+				p = (unsigned*)((char*)nim->card->fifo + (v & 0x1fffffff) - nim->card->fifo_base);
+			} else if(v == 0x00020000) {
+				fprintf(dump_file, "# return\n");
+				p = (unsigned*)((char*)nim->card->fifo + nim->card->pusher_return - nim->card->fifo_base);
+			} else if(!(v & 0xa0000000))
+				nim->card->method_left = (v >> 18) & 2047;
+			else
+				fprintf(dump_file, "# unknown ring value %x\n", v);
+
+//			printf("at %u we have %x\n", p - nim->card->fifo, v);
+//			printf("%x\n", *p);
+		}
+		fprintf(dump_file, "# end\n");
+		fflush(dump_file);
+		nim->card->put = value;
+	}
+//	fprintf(dump_file, "# W %x <- %x\n", off, value);
+	*(unsigned*)(nim->real_addr + off) = value;
+}
+
+static unsigned
+emulate_read32(struct nvidia_intercepted_mapping* nim, void* addr)
+{
+	unsigned off = addr - nim->addr;
+	unsigned value = *(unsigned*)(nim->real_addr + off);
+//	fprintf(dump_file, "R %x -> %x\n", off, value);
+	return value;
+}
+
+/* awesome idea, this unnecessary arbitrary remapping */
+#if __WORDSIZE == 32
+unsigned reg_to_uctx[8] = {REG_EAX, REG_ECX, REG_EDX, REG_EBX, REG_ESP, REG_EBP, REG_ESI, REG_EDI};
+#else
+unsigned reg_to_uctx[16] = {REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15};
+#endif
+
+static inline unsigned long*
+uctx_reg(ucontext_t* uctx, unsigned reg)
+{
+	return (unsigned long*)&uctx->uc_mcontext.gregs[reg_to_uctx[reg]];
+}
+
+static void
+emulate_instruction(struct nvidia_intercepted_mapping* nim, ucontext_t* uctx)
+{
+	uint8_t* code = (uint8_t*)uctx->uc_mcontext.gregs[REG_EIP];
+	uint8_t* p = code;
+	unsigned long addr = 0;
+	unsigned mod;
+	long imm;
+	int reg = -1;
+	unsigned char rex = 0;
+	unsigned opcode;
+
+//	fprintf(stderr, "x86 instruction accessing intercepted mapping:"
+//		" %02x %02x %02x %02x %02x %02x %02x %02x\n",
+//		p[0], p[1], p[2] ,p[3], p[4], p[5], p[6], p[7]);
+
+	for(;;) {
+		if(0) {}
+#if __WORDSIZE == 64
+		if((*p & 0xf0) == 0x40)
+			rex = *p++;
+#endif
+		else
+			break;
+	}
+
+	opcode = *p++;
+	if(opcode == 0x89 || opcode == 0x8b)
+		goto modrm;
+	else
+		goto unhandled;
+
+modrm:
+	{
+		int rm = *p & 7;
+		reg = ((*p >> 3) & 7) + ((rex & 4) << 1);
+		mod = (*p & 0xc0);
+		++p;
+		if(mod == 0) {
+			if(rm == 5)
+				goto disp32;
+			else if(rm == 4)
+				goto sib;
+			else
+				addr = *uctx_reg(uctx, rm + ((rex & 1) << 3));
+		} else if(mod == 0x40) { /* mod1: disp8(%reg) */
+			if(rm == 4)
+				goto sib;
+			else {
+				addr = *uctx_reg(uctx, rm + ((rex & 1) << 3));
+				goto disp8;
+			}
+		} else if(mod == 0x80) {
+			if(rm == 4)
+				goto sib;
+			else {
+				addr = *uctx_reg(uctx, rm + ((rex & 1) << 3));
+				goto disp32;
+			}
+		} else
+			goto unhandled;
+	}
+
+sib:
+	{
+		int si = (*p >> 3) & 7;
+		if(si != 4)
+			addr += *uctx_reg(uctx, si + ((rex & 2) << 2)) << (*p >> 6);
+		int base = (*p & 7);
+		++p;
+		if(base == 5 && !(p[-2] & 0xc0))
+			goto disp32;
+		else {
+			addr += *uctx_reg(uctx, base + ((rex & 1) << 3));
+			if(mod == 0x40)
+				goto disp8;
+			else if(mod == 0x80)
+				goto disp32;
+		}
+	}
+
+disp8:
+	addr += *p++;
+	goto imm;
+
+disp32:
+	addr += p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+	p += 4;
+	goto imm;
+
+imm:
+	goto end;
+
+end:
+	if(opcode == 0x89)
+		emulate_write32(nim, (void*)addr, *uctx_reg(uctx, reg));
+	else if(opcode == 0x8b)
+		*uctx_reg(uctx, reg) = emulate_read32(nim, (void*)addr);
+
+	uctx->uc_mcontext.gregs[REG_EIP] = (long)p;
+	return;
+
+unhandled:
+	p = code;
+	fprintf(stderr, "Fatal error: unhandled x86 instruction accessing intercepted mapping:"
+		" %02x %02x %02x %02x %02x %02x %02x %02x\n",
+		p[0], p[1], p[2] ,p[3], p[4], p[5], p[6], p[7]);
+	fflush(stderr);
+	abort();
+}
+
+static void
+sigsegv_handler(int signum, siginfo_t *si, void *uctxp)
+{
+	ucontext_t* uctx = uctxp;
+	unsigned i;
+
+	//fprintf(dump_file, "# SEGV on %p\n", si->si_addr);
+
+	for(i = 0; i < num_nvidia_intercepted_mappings; ++i) {
+		int off = si->si_addr - nvidia_intercepted_mappings[i].addr;
+		if(off >= 0 && off < nvidia_intercepted_mappings[i].size) {
+			struct nvidia_card* card = nvidia_intercepted_mappings[i].card;
+			int channel = nvidia_intercepted_mappings[i].channel;
+			//fprintf(dump_file, "# INTERCEPTED: %p %i\n", si->si_addr, off);
+			emulate_instruction(&nvidia_intercepted_mappings[i], uctx);
+			return;
+		}
+	}
+
+	// TODO: emulate uncommon sigaction behavior
+	if(app_sigsegv_action.sa_handler == (void*)SIG_IGN)
+		return;
+	else if(app_sigsegv_action.sa_handler == (void*)SIG_DFL) {
+		NEXT(signal)(SIGSEGV, SIG_DFL);
+		*(volatile int*)0 = 0;
+		raise(SIGSEGV);
+		abort();
+		for(;;) {}
+	}
+	else
+		return app_sigsegv_action.sa_sigaction(signum, si, uctxp);
+}
+
+
+int inited;
+
+static void
+init(void)
+{
+	if(inited)
+		return;
+
+	inited = 1;
+
+	char* dump_filename = getenv("NOUVEAU_DUMP");
+	if(dump_filename)
+		dump_file = fopen(dump_filename, "w");
+	if(!dump_file)
+		return;
+
+	FILE* fp;
+	fp = fopen("/proc/bus/pci/devices", "r");
+	char line[4096];
+	while(fgets(line, sizeof(line), fp))
+	{
+		unsigned busid, vendor_device, irq;
+		unsigned bars[7];
+		unsigned sizes[7];
+		char driver[256];
+
+		sscanf(line, "%x %x %x"
+			" %x %x %x %x %x %x %x"
+			" %x %x %x %x %x %x %x"
+			" %s\n",
+		&busid, &vendor_device, &irq,
+		&bars[0], &bars[1], &bars[2], &bars[3], &bars[4], &bars[5], &bars[6],
+		&sizes[0], &sizes[1], &sizes[2], &sizes[3], &sizes[4], &sizes[5], &sizes[6],
+		driver);
+
+		if(!strcmp(driver, "nvidia")) {
+			struct nvidia_card* card = &single_card;
+			card->busid = busid;
+			card->vram_phys = bars[1] & ~0xfff;
+			card->vram_bar_size = sizes[1];
+			card->mmio_phys = bars[0] & ~0xfff;
+		}
+	}
+	fclose(fp);
+
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = sigsegv_handler;
+	sa.sa_flags = SA_SIGINFO;
+	sigemptyset(&sa.sa_mask);
+	NEXT(sigaction)(SIGSEGV, &sa, &app_sigsegv_action);
+}
+
+int
+sigaction(int signum, const struct sigaction *act, struct sigaction *oldact)
+{
+	if(signum == SIGSEGV) {
+		if(oldact)
+			memcpy(oldact, &app_sigsegv_action, sizeof(app_sigsegv_action));
+		if(act)
+			memcpy(&app_sigsegv_action, act, sizeof(app_sigsegv_action));
+		return 0;
+	}
+
+	return NEXT(sigaction)(signum, act, oldact);
+}
+
+sighandler_t
+signal(int signum, sighandler_t handler)
+{
+	if(signum == SIGSEGV) {
+		sighandler_t old = app_sigsegv_action.sa_handler;
+		memset(&app_sigsegv_action, 0, sizeof(app_sigsegv_action));
+		app_sigsegv_action.sa_handler = handler;
+		return old;
+	}
+
+	return NEXT(signal)(signum, handler);
+}
+
+static int
+filter_open(const char* path, int fd)
+{
+	if(memcmp(path, "/dev/nvidia", 11))
+		return fd;
+
+	const char* p = path + 11;
+	for(; *p; ++p) {
+		if(!isdigit(*p))
+			return fd;
+	}
+
+	if(!dump_file) {
+		init();
+		if(!dump_file)
+			return fd;
+	}
+
+	fprintf(dump_file, "# open %i %s\n", fd, path);
+
+	nvidia_fds[num_nvidia_fds].fd = fd;
+	nvidia_fds[num_nvidia_fds].card = &single_card;
+	++num_nvidia_fds;
+	return fd;
+}
+
+int
+open(const char *path, int oflag, int mode)
+{
+	return filter_open(path, NEXT(open)(path, oflag, mode));
+}
+
+int
+open64(const char *path, int oflag, int mode)
+{
+	return filter_open(path, NEXT(open64)(path, oflag, mode));
+}
+
+static void*
+filter_mmap(struct nvidia_card* card, unsigned long long off, size_t len, void* p)
+{
+	if(!card)
+		return p;
+
+	fprintf(dump_file, "#mmap off %Lx len %x => %p\n", off, len, p);
+
+	int mmio = off - card->mmio_phys;
+	int user_channel = -1;
+	if(mmio >= 0 && mmio < (16 * 1024 * 1024)) {
+		int user;
+		user = mmio - 0x800000;
+		if(user >= 0 && !(user & 0xffff) && user < 32 * 0x10000 && len == 0x10000)
+			user_channel = user >> 16;
+		else {
+			user = mmio - 0xc00000;
+			if(user >= 0 && !(user & 0xfff) && user < 32 * 0x1000 && len == 0x1000)
+				user_channel = user >> 12;
+			else if(user >= 0 && !(user & 0x1fff) && user < 128 * 0x2000 && len == 0x2000)
+				user_channel = user >> 13;
+		}
+	}
+
+	if(user_channel >= 0)
+	{
+		int fd = open("/dev/zero", O_RDWR, 0);
+		void* map = mmap(0, len, 0, MAP_SHARED, fd, 0);
+		close(fd);
+		struct nvidia_intercepted_mapping* nim = &nvidia_intercepted_mappings[num_nvidia_intercepted_mappings++];
+		nim->card = card;
+		nim->channel = user_channel;
+		nim->addr = map;
+		nim->real_addr = p;
+		nim->size = len;
+		return map;
+	}
+
+	if(len == 1024 * 1024) {
+//		if(!card->scratch) {
+//			card->scratch = p;
+//			printf("scratch\n");
+//		} else {
+		{
+			fprintf(dump_file, "# <fifo>\n");
+			card->fifo = p;
+			card->put = 0x160000;
+			card->fifo_base = 0x60000;
+		}
+	}
+
+	return p;
+}
+
+void*
+mmap64(void *addr, size_t len, int prot, int flags, int fd, off64_t off)
+{
+	void* p = NEXT(mmap64)(addr, len, prot, flags, fd, off);
+	return filter_mmap(get_card(fd), off, len, p);
+}
+
+void*
+mmap(void *addr, size_t len, int prot, int flags, int fd, off_t off)
+{
+	void* p = NEXT(mmap)(addr, len, prot, flags, fd, off);
+	return filter_mmap(get_card(fd), off, len, p);
+}
+
diff --git a/nvdump b/nvdump
new file mode 100755
index 0000000..a062326
--- /dev/null
+++ b/nvdump
@@ -0,0 +1,21 @@
+#!/bin/bash
+# This tool will dump the FIFO commands on both Nouveau and the blob
+# For the blob, it needs libnvdump installed
+# For Nouveau, it needs libdrm with the patch at http://www.mail-archive.com/nouveau@lists.freedesktop.org/msg05406.html applied
+#
+# -n will cause commands to not be submitted to the GPU (Nouveau only for now)
+
+if glxinfo 2>/dev/null|grep -q 'OpenGL vendor string: NVIDIA Corporation'; then
+	export LD_PRELOAD=libnvdump.so
+fi
+
+if "$1" == "-n"; then
+	# Nouveau-only for now
+	export NOUVEAU_NO_SUBMIT=1
+	shift
+fi
+
+file="$1"
+shift
+export NOUVEAU_DUMP="$file"
+exec "$@"
author	Luca Barbieri <luca@luca-barbieri.com>	2010-04-17 12:45:57 +0200
committer	Luca Barbieri <luca@luca-barbieri.com>	2010-04-17 12:54:42 +0200
commit	c2bf6e7ba6c77b345fe1394d90e2e53d0f0c964c (patch)
tree	48fd730c301a55368b45e317b329f5ab9da4c845