#define _GNU_SOURCE #define open sys_open #define open64 sys_open64 #include #include #include #include #include #include #include #include #include #include #undef open #undef open64 #define NEXT(func) ({static typeof(func)* NEXT_##func; if(!NEXT_##func) NEXT_##func = (typeof(func)*)dlsym(RTLD_NEXT, #func); NEXT_##func;}) FILE* dump_file; struct nvidia_card { unsigned busid; unsigned long long vram_phys; unsigned long long vram_bar_size; unsigned long long mmio_phys; // char* mmio; /* TODO: this should be in a channel struct */ char* fifo; unsigned fifo_base; unsigned put; unsigned pusher_return; unsigned method_left; char* scratch; }; // TODO: this assumes a single card struct nvidia_card single_card; struct nvidia_fd { int fd; struct nvidia_card* card; }; struct nvidia_fd nvidia_fds[256]; int num_nvidia_fds; struct nvidia_card* get_card(int fd) { int i; for(i = 0; i < num_nvidia_fds; ++i) if(nvidia_fds[i].fd == fd) return nvidia_fds[i].card; return 0; } struct sigaction app_sigsegv_action; struct nvidia_intercepted_mapping { void* addr; void* real_addr; size_t size; struct nvidia_card* card; int channel; }; struct nvidia_intercepted_mapping nvidia_intercepted_mappings[256]; int num_nvidia_intercepted_mappings; static void emulate_write32(struct nvidia_intercepted_mapping* nim, void* addr, unsigned value) { unsigned off = addr - nim->addr; if(off == 0x40) { unsigned* fifo_start = (unsigned*)((char*)nim->card->fifo + nim->card->put - nim->card->fifo_base); unsigned* fifo_end = (unsigned*)((char*)nim->card->fifo + value - nim->card->fifo_base); unsigned* p; fprintf(dump_file, "# fifo from %x to %x\n", nim->card->put, value); for(p = fifo_start; p != fifo_end; ++p) { unsigned v = *p; fprintf(dump_file, "%x\n", v); if(nim->card->method_left) --nim->card->method_left; else if((v & 3) == 1) { fprintf(dump_file, "# jump to %x\n", v & ~3); p = (unsigned*)((char*)nim->card->fifo + (v & ~3) - nim->card->fifo_base); } else if((v & 3) == 2) { fprintf(dump_file, "# call to %x\n", v & ~3); nim->card->pusher_return = ((char*)p - nim->card->fifo) + nim->card->fifo_base; p= (unsigned*)((char*)nim->card->fifo + (v & 0x1fffffff) - nim->card->fifo_base); } else if((v & 0xe0000000) == 0x20000000) { fprintf(dump_file, "# old-style jump to %x\n", v & 0x1fffffff); p = (unsigned*)((char*)nim->card->fifo + (v & 0x1fffffff) - nim->card->fifo_base); } else if(v == 0x00020000) { fprintf(dump_file, "# return\n"); p = (unsigned*)((char*)nim->card->fifo + nim->card->pusher_return - nim->card->fifo_base); } else if(!(v & 0xa0000000)) nim->card->method_left = (v >> 18) & 2047; else fprintf(dump_file, "# unknown ring value %x\n", v); // printf("at %u we have %x\n", p - nim->card->fifo, v); // printf("%x\n", *p); } fprintf(dump_file, "# end\n"); fflush(dump_file); nim->card->put = value; } // fprintf(dump_file, "# W %x <- %x\n", off, value); *(unsigned*)(nim->real_addr + off) = value; } static unsigned emulate_read32(struct nvidia_intercepted_mapping* nim, void* addr) { unsigned off = addr - nim->addr; unsigned value = *(unsigned*)(nim->real_addr + off); // fprintf(dump_file, "R %x -> %x\n", off, value); return value; } /* awesome idea, this unnecessary arbitrary remapping */ #if __WORDSIZE == 32 unsigned reg_to_uctx[8] = {REG_EAX, REG_ECX, REG_EDX, REG_EBX, REG_ESP, REG_EBP, REG_ESI, REG_EDI}; #else unsigned reg_to_uctx[16] = {REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15}; #endif static inline unsigned long* uctx_reg(ucontext_t* uctx, unsigned reg) { return (unsigned long*)&uctx->uc_mcontext.gregs[reg_to_uctx[reg]]; } static void emulate_instruction(struct nvidia_intercepted_mapping* nim, ucontext_t* uctx) { uint8_t* code = (uint8_t*)uctx->uc_mcontext.gregs[REG_EIP]; uint8_t* p = code; unsigned long addr = 0; unsigned mod; long imm; int reg = -1; unsigned char rex = 0; unsigned opcode; // fprintf(stderr, "x86 instruction accessing intercepted mapping:" // " %02x %02x %02x %02x %02x %02x %02x %02x\n", // p[0], p[1], p[2] ,p[3], p[4], p[5], p[6], p[7]); for(;;) { if(0) {} #if __WORDSIZE == 64 if((*p & 0xf0) == 0x40) rex = *p++; #endif else break; } opcode = *p++; if(opcode == 0x89 || opcode == 0x8b) goto modrm; else goto unhandled; modrm: { int rm = *p & 7; reg = ((*p >> 3) & 7) + ((rex & 4) << 1); mod = (*p & 0xc0); ++p; if(mod == 0) { if(rm == 5) goto disp32; else if(rm == 4) goto sib; else addr = *uctx_reg(uctx, rm + ((rex & 1) << 3)); } else if(mod == 0x40) { /* mod1: disp8(%reg) */ if(rm == 4) goto sib; else { addr = *uctx_reg(uctx, rm + ((rex & 1) << 3)); goto disp8; } } else if(mod == 0x80) { if(rm == 4) goto sib; else { addr = *uctx_reg(uctx, rm + ((rex & 1) << 3)); goto disp32; } } else goto unhandled; } sib: { int si = (*p >> 3) & 7; if(si != 4) addr += *uctx_reg(uctx, si + ((rex & 2) << 2)) << (*p >> 6); int base = (*p & 7); ++p; if(base == 5 && !(p[-2] & 0xc0)) goto disp32; else { addr += *uctx_reg(uctx, base + ((rex & 1) << 3)); if(mod == 0x40) goto disp8; else if(mod == 0x80) goto disp32; } } disp8: addr += *p++; goto imm; disp32: addr += p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24); p += 4; goto imm; imm: goto end; end: if(opcode == 0x89) emulate_write32(nim, (void*)addr, *uctx_reg(uctx, reg)); else if(opcode == 0x8b) *uctx_reg(uctx, reg) = emulate_read32(nim, (void*)addr); uctx->uc_mcontext.gregs[REG_EIP] = (long)p; return; unhandled: p = code; fprintf(stderr, "Fatal error: unhandled x86 instruction accessing intercepted mapping:" " %02x %02x %02x %02x %02x %02x %02x %02x\n", p[0], p[1], p[2] ,p[3], p[4], p[5], p[6], p[7]); fflush(stderr); abort(); } static void sigsegv_handler(int signum, siginfo_t *si, void *uctxp) { ucontext_t* uctx = uctxp; unsigned i; //fprintf(dump_file, "# SEGV on %p\n", si->si_addr); for(i = 0; i < num_nvidia_intercepted_mappings; ++i) { int off = si->si_addr - nvidia_intercepted_mappings[i].addr; if(off >= 0 && off < nvidia_intercepted_mappings[i].size) { struct nvidia_card* card = nvidia_intercepted_mappings[i].card; int channel = nvidia_intercepted_mappings[i].channel; //fprintf(dump_file, "# INTERCEPTED: %p %i\n", si->si_addr, off); emulate_instruction(&nvidia_intercepted_mappings[i], uctx); return; } } // TODO: emulate uncommon sigaction behavior if(app_sigsegv_action.sa_handler == (void*)SIG_IGN) return; else if(app_sigsegv_action.sa_handler == (void*)SIG_DFL) { NEXT(signal)(SIGSEGV, SIG_DFL); *(volatile int*)0 = 0; raise(SIGSEGV); abort(); for(;;) {} } else return app_sigsegv_action.sa_sigaction(signum, si, uctxp); } int inited; static void init(void) { if(inited) return; inited = 1; char* dump_filename = getenv("NOUVEAU_DUMP"); if(dump_filename) dump_file = fopen(dump_filename, "w"); if(!dump_file) return; FILE* fp; fp = fopen("/proc/bus/pci/devices", "r"); char line[4096]; while(fgets(line, sizeof(line), fp)) { unsigned busid, vendor_device, irq; unsigned bars[7]; unsigned sizes[7]; char driver[256]; sscanf(line, "%x %x %x" " %x %x %x %x %x %x %x" " %x %x %x %x %x %x %x" " %s\n", &busid, &vendor_device, &irq, &bars[0], &bars[1], &bars[2], &bars[3], &bars[4], &bars[5], &bars[6], &sizes[0], &sizes[1], &sizes[2], &sizes[3], &sizes[4], &sizes[5], &sizes[6], driver); if(!strcmp(driver, "nvidia")) { struct nvidia_card* card = &single_card; card->busid = busid; card->vram_phys = bars[1] & ~0xfff; card->vram_bar_size = sizes[1]; card->mmio_phys = bars[0] & ~0xfff; } } fclose(fp); struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = sigsegv_handler; sa.sa_flags = SA_SIGINFO; sigemptyset(&sa.sa_mask); NEXT(sigaction)(SIGSEGV, &sa, &app_sigsegv_action); } int sigaction(int signum, const struct sigaction *act, struct sigaction *oldact) { if(signum == SIGSEGV) { if(oldact) memcpy(oldact, &app_sigsegv_action, sizeof(app_sigsegv_action)); if(act) memcpy(&app_sigsegv_action, act, sizeof(app_sigsegv_action)); return 0; } return NEXT(sigaction)(signum, act, oldact); } sighandler_t signal(int signum, sighandler_t handler) { if(signum == SIGSEGV) { sighandler_t old = app_sigsegv_action.sa_handler; memset(&app_sigsegv_action, 0, sizeof(app_sigsegv_action)); app_sigsegv_action.sa_handler = handler; return old; } return NEXT(signal)(signum, handler); } static int filter_open(const char* path, int fd) { if(memcmp(path, "/dev/nvidia", 11)) return fd; const char* p = path + 11; for(; *p; ++p) { if(!isdigit(*p)) return fd; } if(!dump_file) { init(); if(!dump_file) return fd; } fprintf(dump_file, "# open %i %s\n", fd, path); nvidia_fds[num_nvidia_fds].fd = fd; nvidia_fds[num_nvidia_fds].card = &single_card; ++num_nvidia_fds; return fd; } int open(const char *path, int oflag, int mode) { return filter_open(path, NEXT(open)(path, oflag, mode)); } int open64(const char *path, int oflag, int mode) { return filter_open(path, NEXT(open64)(path, oflag, mode)); } static void* filter_mmap(struct nvidia_card* card, unsigned long long off, size_t len, void* p) { if(!card) return p; fprintf(dump_file, "#mmap off %Lx len %x => %p\n", off, len, p); int mmio = off - card->mmio_phys; int user_channel = -1; if(mmio >= 0 && mmio < (16 * 1024 * 1024)) { int user; user = mmio - 0x800000; if(user >= 0 && !(user & 0xffff) && user < 32 * 0x10000 && len == 0x10000) user_channel = user >> 16; else { user = mmio - 0xc00000; if(user >= 0 && !(user & 0xfff) && user < 32 * 0x1000 && len == 0x1000) user_channel = user >> 12; else if(user >= 0 && !(user & 0x1fff) && user < 128 * 0x2000 && len == 0x2000) user_channel = user >> 13; } } if(user_channel >= 0) { int fd = open("/dev/zero", O_RDWR, 0); void* map = mmap(0, len, 0, MAP_SHARED, fd, 0); close(fd); struct nvidia_intercepted_mapping* nim = &nvidia_intercepted_mappings[num_nvidia_intercepted_mappings++]; nim->card = card; nim->channel = user_channel; nim->addr = map; nim->real_addr = p; nim->size = len; return map; } if(len == 1024 * 1024) { // if(!card->scratch) { // card->scratch = p; // printf("scratch\n"); // } else { { fprintf(dump_file, "# \n"); card->fifo = p; card->put = 0x160000; card->fifo_base = 0x60000; } } return p; } void* mmap64(void *addr, size_t len, int prot, int flags, int fd, off64_t off) { void* p = NEXT(mmap64)(addr, len, prot, flags, fd, off); return filter_mmap(get_card(fd), off, len, p); } void* mmap(void *addr, size_t len, int prot, int flags, int fd, off_t off) { void* p = NEXT(mmap)(addr, len, prot, flags, fd, off); return filter_mmap(get_card(fd), off, len, p); }