/* * nvlib.h * * Created on: Mar 18, 2010 * Author: lb */ #ifndef NVLIB_H_ #define NVLIB_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "nouveau_reg.h" #include #define PAGE_SIZE 4096 inline std::ostream& hex08(std::ostream& out) { return out << std::hex << std::setw(8) << std::setfill('0'); } #define NV_PFIFO_RAMHT 0x2210 enum nouveau_card_type { NV_04 = 0x00, NV_10 = 0x10, NV_20 = 0x20, NV_30 = 0x30, NV_40 = 0x40, NV_50 = 0x50, }; enum nv_driver_type { NV_DRIVER_NONE = 0, NV_DRIVER_NVIDIA = 1, NV_DRIVER_NOUVEAU = 2, }; struct os_interface { enum nv_driver_type nv_driver; virtual void memcpy_from_phys(void* to, uint64_t from, size_t size) = 0; virtual void memcpy_to_phys(uint64_t to, const void* from, size_t size) = 0; virtual void memcpy_phys_to_phys(uint64_t to, uint64_t from, size_t size) = 0; virtual void trace_marker(const char* s) {}; }; struct os_linux : public os_interface { int devmem; int physmem; os_linux() { devmem = open("/dev/mem", O_RDWR); if(devmem < 0) { std::cerr << "Unable to open /dev/mem. Are you root?" << std::endl; exit(1); } pci_system_init(); pci_system_init_dev_mem(devmem); int fd; fd = open("/dev/nvidiactl", O_RDWR); if(fd >= 0) { nv_driver = NV_DRIVER_NVIDIA; close(fd); } else { fd = open("/dev/dri/card0", O_RDWR); if(fd >= 0) { nv_driver = NV_DRIVER_NOUVEAU; close(fd); } else nv_driver = NV_DRIVER_NONE; } } virtual void memcpy_from_phys(void* tov, uint64_t from, size_t size) { char* to = (char*)tov; if(pread(devmem, to, size, from) < (ssize_t)size) { while(size) { size_t copy = PAGE_SIZE - (from & (PAGE_SIZE - 1)); if(copy > size) copy = size; char* p = (char*)mmap(0, PAGE_SIZE, PROT_READ, MAP_SHARED, devmem, from & ~(PAGE_SIZE - 1)); if(p == MAP_FAILED) throw std::runtime_error(strerror(errno)); memcpy(to, p + (from & (PAGE_SIZE - 1)), copy); munmap(p, PAGE_SIZE); to += copy; from += copy; size -= copy; } } } /* For some unfathomable reason, sometimes you can mmap vram but not read/write it */ virtual void memcpy_to_phys(uint64_t to, const void* fromv, size_t size) { const char* from = (const char*)fromv; if(pwrite(devmem, from, size, to) < (ssize_t)size) { while(size) { size_t copy = PAGE_SIZE - (to & (PAGE_SIZE - 1)); if(copy > size) copy = size; char* p = (char*)mmap(0, PAGE_SIZE, PROT_WRITE, MAP_SHARED, devmem, to & ~(PAGE_SIZE - 1)); if(p == MAP_FAILED) throw std::runtime_error(strerror(errno)); memcpy(p + (to & (PAGE_SIZE - 1)), from, copy); munmap(p, PAGE_SIZE); to += copy; from += copy; size -= copy; } } } virtual void memcpy_phys_to_phys(uint64_t to, uint64_t from, size_t size) { char buf[4096]; while(size) { size_t copy = size; if(copy > sizeof(buf)) copy = sizeof(buf); memcpy_from_phys(buf, from, size); memcpy_to_phys(to, buf, size); to += copy; from += copy; size -= copy; } } virtual void trace_marker(const char* s) { int fd = open("/sys/kernel/debug/tracing/trace_marker", O_WRONLY); if(fd >= 0) { write(fd, s, strlen(s)); close(fd); } } }; struct os_interface* os; struct os_init_struct { os_init_struct() { os = new os_linux(); } }; os_init_struct os_init_global; /* there are slight variations, but they should not matter since we only match the lower byte */ unsigned nvidia_grclasses[8 * 6] = { /* nv04 */ NV04_SWIZZLED_SURFACE, NV04_TEXTURED_TRIANGLE, NV04_MULTITEX_TRIANGLE, NV04_GDI_RECTANGLE_TEXT, NV04_CONTEXT_SURFACES_2D, NV04_CONTEXT_SURFACES_3D, NV04_IMAGE_BLIT, NV04_SCALED_IMAGE_FROM_MEMORY, /* nv10 */ NV04_GDI_RECTANGLE_TEXT, NV11TCL, NV10_SCALED_IMAGE_FROM_MEMORY, NV04_MEMORY_TO_MEMORY_FORMAT, NV04_SWIZZLED_SURFACE, NV12_IMAGE_BLIT, NV10_CONTEXT_SURFACES_2D, 0, /* nv20 */ NV04_BETA_SOLID, NV20TCL, NV10_SCALED_IMAGE_FROM_MEMORY, NV04_MEMORY_TO_MEMORY_FORMAT, NV20_SWIZZLED_SURFACE, NV12_IMAGE_BLIT, NV10_CONTEXT_SURFACES_2D, 0, /* nv30 */ NV04_GDI_RECTANGLE_TEXT, NV34TCL, NV30_SCALED_IMAGE_FROM_MEMORY, NV04_MEMORY_TO_MEMORY_FORMAT, NV30_SWIZZLED_SURFACE, NV12_IMAGE_BLIT, NV30_CONTEXT_SURFACES_2D, 0, /* nv40 */ NV04_BETA_SOLID, NV40TCL, NV40_SCALED_IMAGE_FROM_MEMORY, NV04_MEMORY_TO_MEMORY_FORMAT, NV40_SWIZZLED_SURFACE, NV12_IMAGE_BLIT, NV40_CONTEXT_SURFACES_2D, 0, /* nv50 */ 0, /* TODO: UNKNOWN! handle is 0xbeef4901, maybe a software object? */ NV50TCL, NV50_MEMORY_TO_MEMORY_FORMAT, NV50_2D, 0, 0, 0, 0, }; #define NV20_GRCTX_SIZE (3580*4) #define NV25_GRCTX_SIZE (3529*4) #define NV2A_GRCTX_SIZE (3500*4) #define NV30_31_GRCTX_SIZE (24392) #define NV34_GRCTX_SIZE (18140) #define NV35_36_GRCTX_SIZE (22396) struct nv_ramht_entry { bool valid; unsigned char channel; unsigned char engine; unsigned handle; unsigned instance; static bool channel_handle_less(const nv_ramht_entry& a, const nv_ramht_entry& b) { if(a.channel != b.channel) return a.channel < b.channel; else return a.handle < b.handle; } static bool instance_less(const nv_ramht_entry& a, const nv_ramht_entry& b) { return a.instance < b.instance; } }; struct nv_device; struct nv_region { nv_device* dev; char* ptr; size_t size; nv_region(nv_device* dev) : dev(dev), ptr(0), size(0) {} nv_region(nv_device* dev, char* ptr, size_t size) : dev(dev), ptr(ptr), size(size) {} uint32_t rd32(uint32_t off) const { return *(volatile uint32_t*)(ptr + off); } void wr32(uint32_t off, uint32_t value) const { *(volatile uint32_t*)(ptr + off) = value; } int offset_in(const nv_region& container) const { if(container.ptr > ptr) return -1; if((ptr + size) > (container.ptr + container.size)) return -1; return ptr - container.ptr; } }; struct nv_device : public nv_region { // TODO: implement nv50 ramin flushing for writes struct nv_ramin : public nv_region { nv_ramin(struct nv_device* dev) : nv_region(dev) { /* map larger RAMIN aperture on NV40-NV50 cards */ ptr = NULL; if (dev->card_type >= NV_40) { int ramin_bar = 2; if (dev->pci->regions[ramin_bar].size == 0) ramin_bar = 3; size = dev->pci->regions[ramin_bar].size; pci_device_map_range(dev->pci, dev->pci->regions[ramin_bar].base_addr, dev->pci->regions[ramin_bar].size, PCI_DEV_MAP_FLAG_WRITABLE, (void**)&ptr); if (!ptr) throw std::runtime_error("Failed to init RAMIN mapping"); } /* On older cards (or if the above failed), create a map covering * the BAR0 PRAMIN aperture */ if (!ptr) { size = 1 * 1024 * 1024; ptr = dev->ptr + NV_RAMIN; } } }; struct nv_linear_ramin : public nv_region { nv_linear_ramin(struct nv_device* dev) : nv_region(dev) { size = 16 * 1024 * 1024; ptr = dev->ptr + NV_RAMIN; // XXX: OUCH! //size = dev->vram_mappable_size; //pci_device_map_range(dev->pci, dev->pci->regions[1].base_addr, size, PCI_DEV_MAP_FLAG_WRITABLE, (void**)&ptr); //if (!ptr) // throw std::runtime_error("Failed to init linear RAMIN mapping"); } }; struct nv_ramht : public nv_region { std::shared_ptr ramin; int channel; unsigned bits; unsigned entries; //unsigned search_shift; // TODO: what is this exactly? nv_ramht(std::shared_ptr ramin) : nv_region(ramin->dev), ramin(ramin) { assert(dev->card_type < NV_50); uint32_t reg = dev->rd32(NV_PFIFO_RAMHT); bits = ((reg >> 16) & 0xf) + 9; ptr = ramin->ptr + ((reg & 0xffff) << 8); entries = 1 << bits; size = entries * 8; //search_shift = (reg >> 24) + 4; } nv_ramht(std::shared_ptr ramin, uint32_t offset, int channel = -1) : nv_region(ramin->dev), ramin(ramin), channel(channel) { assert(dev->card_type >= NV_50); ptr = ramin->ptr + offset; bits = 9; entries = 1 << bits; size = entries * 8; } uint32_t hash_handle(int channel, uint32_t handle) const { uint32_t hash = 0; int i; for (i = 32; i > 0; i -= bits) { hash ^= (handle & ((1 << bits) - 1)); handle >>= bits; } if (dev->card_type < NV_50) hash ^= channel << (bits - 4); hash <<= 3; return hash; } int find(unsigned channel, uint32_t handle, nv_ramht_entry& entry) const { unsigned start = hash_handle(channel, handle); unsigned i = start; do { entry = get_at(i); if(entry.valid) { if(entry.channel == channel && entry.handle == handle) return i; } else return i; ++i; if(i == entries) i = 0; } while(i != start); return -1; } nv_ramht_entry get_at(unsigned i) const { nv_ramht_entry entry; uint32_t ctx; entry.handle = rd32(i * 8); ctx = rd32(i * 8 + 4); if(dev->card_type < NV_40) { entry.valid = (ctx & (1 << 31)) != 0; entry.engine = (ctx >> 16) & 3; entry.channel = (ctx >> 24) & (dev->channels - 1); entry.instance = (ctx & 0xffff) << 4; } else if(dev->card_type < NV_50) { entry.valid = ctx != 0; entry.engine = (ctx >> 20) & 3; entry.channel = (ctx >> 23) & (dev->channels - 1);; entry.instance = (ctx & 0xfffff) << 4; } else { entry.valid = ctx != 0; //if(evo) { // entry.instance = (ctx & 0xfffff) >> 10; // entry.engine = 2; //} entry.instance = (ctx & 0xfffff) << 4; entry.engine = (ctx >> 20) & 3; entry.channel = this->channel; } return entry; } void set_at(unsigned i, const nv_ramht_entry& entry) const { uint32_t ctx; wr32(i * 8, entry.handle); if(dev->card_type < NV_40) ctx = (entry.instance >> 4) | (entry.engine << 16) | (entry.channel << 23); else if(dev->card_type < NV_50) ctx = (entry.instance >> 4) | (entry.engine << 20) | (entry.channel << 24); else if(entry.engine == 2) ctx = (entry.instance << 10) | 2; else ctx = (entry.instance >> 4) | (entry.engine << 20); wr32(i * 8 + 4, ctx); } void clear_at(unsigned i) const { wr32(i * 8, 0); wr32(i * 8 + 4, 0); } void insert(const nv_ramht_entry& entry) const { nv_ramht_entry cur; int i = find(entry.channel, entry.handle, cur); if(i < 0) throw std::runtime_error("ramht table full!"); set_at(i, entry); } void remove(unsigned channel, uint32_t handle) const { nv_ramht_entry entry; int i = find(channel, handle, entry); if(i >= 0 && entry.valid) clear_at(i); } }; struct nv_object : public nv_region { enum nv_object_type { grobj, dma_paged, dma_linear }; enum nv_target { vram, vram_tiled, pci, gart }; std::shared_ptr ramin; nv_object_type type; uint32_t tag; int64_t dma_base; int64_t dma_limit; bool dma_present; nv_target dma_target; static const char* dma_target_str(nv_target dma_target) { const char* dma_target_strs[4] = {"VRAM", "VRAM_TILED", "PCI", "AGP"}; return dma_target_strs[dma_target]; } nv_object(std::shared_ptr ramin, uint32_t offset) : nv_region(ramin->dev), ramin(ramin) { ptr = ramin->ptr + offset; tag = rd32(0); dma_base = -1; dma_limit = -1; unsigned objclass = tag & 0xff; if((objclass == 0x3d || objclass == 2 || objclass == 3)) { if(dev->card_type < NV_50) { type = (tag & (1 << 13)) ? dma_linear : dma_paged; dma_limit = rd32(4); dma_target = (nv_target)((tag >> 16) & 3); dma_present = !!(tag & (1 << 12)); if(!dma_present) /* TODO: guess */ size = 8; else if(type == dma_linear) { dma_base = (rd32(8) & ~0xfff) | (tag & 0xff000000); size = 12; } else size = (((dma_limit + 0xfff) >> 12) + 2) * 4; } else { /* TODO: nv50 non-linear ctxdmas? flags? */ unsigned v3 = rd32(12); dma_limit = rd32(4) | ((uint64_t)(v3 & 0xff000000) << 8); dma_base = rd32(8) | ((uint64_t)(v3 & 0xff) << 32); size = 24; // TODO: this is almost surely incomplete type = dma_linear; dma_present = true; if(tag & 0xf0000) dma_target = vram; else dma_target = gart; } } else { type = grobj; /*XXX: dodgy hack for now */ if (dev->card_type >= NV_50) size = 24; else if (dev->card_type >= NV_40) size = 32; else size = 16; } } uint64_t dma_to_linear(uint32_t off) const { uint32_t size; uint64_t addr; assert(type != grobj); if (dev->card_type < NV_50) { size = dma_limit + 1; if(off >= size) return ~0ULL; if(!dma_present) return ~0ULL; if(type == dma_paged) { uint64_t pte = rd_pte(off >> 12); addr += (pte & ~0xfff) + (off & 0xfff); } else if(type == dma_linear) return dma_base + off; else assert(0); } else { assert(type == dma_linear); return dma_base + off; } return addr; } /* TODO: this probably should be redesigned */ uint64_t dma_to_phys(uint32_t off) const { uint64_t addr; if (dev->card_type < NV_50) { uint64_t addr = dma_to_linear(off); if(addr == ~0ULL) return addr; if(dma_target == NV_DMA_TARGET_VIDMEM) { if(addr >= dev->vram_mappable_size) return ~0ULL; else return addr + dev->vram_phys; } else if(dma_target == NV_DMA_TARGET_PCI) return addr; else if(dma_target == NV_DMA_TARGET_AGP) throw std::runtime_error("AGP not implemented"); else assert(0); } else /* TODO: VM... */ assert(0); return addr; } unsigned num_ptes() const { assert(type == dma_paged); return (size >> 2) - 2; } uint64_t rd_pte(unsigned i) const { assert(type == dma_paged); return rd32(8 + i * 4) | (tag & 0xff000000); } void wr_pte_present_wr(unsigned i, uint64_t offset) const { assert(type == dma_paged); wr32(8 + i * 4, offset | 3); } void print(std::ostream& out) const { boost::io::ios_all_saver ias(out); if(type == grobj) { out << "GR"; for(unsigned i = 0; i < size; i += 4) out << ' ' << hex08 << rd32(i); } else { out << dma_target_str(dma_target) << ' ' << hex08 << tag << ' ' << hex08 << dma_limit; if(type == dma_linear) out << " -> " << hex08 << dma_base; } } friend std::ostream& operator <<(std::ostream& out, const nv_object& obj) { obj.print(out); return out; } }; struct nv_ramfc : public nv_region { std::shared_ptr ramin; nv_ramfc(std::shared_ptr ramin) : nv_region(ramin->dev), ramin(ramin) { assert(dev->card_type < NV_50); if(dev->card_type >= NV_40) ptr = ramin->ptr + 0x20000; else ptr = ramin->ptr + 0x11400; size = dev->fifoctx_size * dev->channels; } }; struct nv_users : public nv_region { struct nv_user : public nv_region { nv_users* users; nv_user(nv_users* users, unsigned channel) : nv_region(users->dev), users(users) { ptr = users->ptr + users->user_size * channel; size = users->user_size; } }; nv_device* dev; unsigned user_size; std::shared_ptr user[128]; nv_users(nv_device* dev) : nv_region(dev) { if (dev->card_type < NV_40) { ptr = dev->ptr + NV03_USER(0); user_size = NV03_USER_SIZE; } else if (dev->card_type < NV_50) { ptr = dev->ptr + NV40_USER(0); user_size = NV40_USER_SIZE; } else { ptr = dev->ptr + NV50_USER(0); user_size = NV50_USER_SIZE; } size = user_size * dev->channels; for(unsigned i = 0; i < dev->channels; ++i) user[i].reset(new nv_user(this, i)); } }; typedef nv_users::nv_user nv_user; struct nv_ramro : public nv_region { nv_ramro(std::shared_ptr ramin) : nv_region(ramin->dev) { ptr = ramin->ptr + (dev->rd32(NV03_PFIFO_RAMRO) << 8); size = 512; } }; struct nv20_grctx_table : public nv_region { nv20_grctx_table(std::shared_ptr ramin) : nv_region(dev) { ptr = ramin->ptr + (dev->rd32(NV20_PGRAPH_CHANNEL_CTX_TABLE) << 4); size = 4 * 32; } }; struct nv_grctx : public nv_region { nv_grctx(nv_device* dev, unsigned offset) : nv_region(dev) { ptr = dev->ramin->ptr + offset; size = dev->grctx_size; } }; struct nv_fifoctx : public nv_region { nv_fifoctx(std::shared_ptr ramin, uint32_t offset) : nv_region(ramin->dev) { assert(dev->card_type >= NV_50); ptr = ramin->ptr + offset; size = dev->fifoctx_size; } nv_fifoctx(std::shared_ptr ramfc, unsigned channel) : nv_region(ramfc->dev) { assert(dev->card_type < NV_50); ptr = ramfc->ptr + dev->fifoctx_size * channel; size = dev->fifoctx_size; } std::shared_ptr ramht() const { if(dev->dev_ramht) return dev->dev_ramht; else // TODO: is the high part the logarithm of the size? return std::shared_ptr(new nv_ramht(dev->ramin, (rd32(0x80) & 0xffffff) << 4)); } std::shared_ptr grctx() const { if(dev->card_type >= NV_40) { unsigned offset; if(dev->card_type >= NV_50) { if(dev->chipset < 0x60) offset = offset_in(*dev->ramin) + 0x200; else offset = rd32(0x98) << 12; } else { assert(dev->fifoctx_grctx >= 0); offset = rd32(56) << 4; } return std::shared_ptr(new nv_grctx(dev, offset)); } else return std::shared_ptr(); } }; struct nv_hwchannel { nv_device* dev; unsigned channel; std::shared_ptr user; std::shared_ptr fifoctx; std::shared_ptr grctx; std::shared_ptr ramht; nv_hwchannel(nv_device* dev, unsigned channel, std::shared_ptr fifoctx = std::shared_ptr()) : dev(dev) { update(channel, fifoctx); } void update(int p_channel = -1, std::shared_ptr p_fifoctx = std::shared_ptr()) { if(p_channel >= 0) channel = p_channel; assert(channel < dev->channels); user = dev->users->user[channel]; if(p_fifoctx) fifoctx = p_fifoctx; else fifoctx = dev->fifoctx(channel); grctx = fifoctx->grctx(); if(!grctx) grctx = dev->grctx(channel); ramht = fifoctx->ramht(); if(ramht != dev->dev_ramht) ramht->channel = channel; } bool enabled() const { return dev->is_channel_enabled(channel); } bool get_grclasses(unsigned grclasses[8]) const { if(os->nv_driver == NV_DRIVER_NVIDIA) { memcpy(grclasses, nvidia_grclasses + 8 * dev->card_type, 8 * sizeof(unsigned)); } else { if(!dev->grctx_grclasses) return false; if(!grctx) return false; for(unsigned i = 0; i < 8; ++i) grclasses[i] = grctx->rd32(dev->grctx_grclasses + i * 4) & 0xffff; } return true; } // XXX: these need to be revisited, since we cannot read/write fifoctx while the context is running! // TODO: we should read/write from pfifo directly if the channel is running uint32_t rd_get() const { return user->rd32(0x44); } void wr_get(uint32_t value) const { user->wr32(0x44, value); } uint32_t rd_put() const { /* user put always reads as 0 pre-nv40 */ if(dev->card_type < NV_40) return fifoctx->rd32(0); else return user->rd32(0x40); } void wr_put(uint32_t value) const { user->wr32(0x40, value); } uint32_t rd_dma() const { unsigned dma = fifoctx->rd32(dev->fifoctx_fifo) << 4; if(dev->card_type < NV_40) dma &= 0xfffff; return dma; } void wr_dma(uint32_t v) { fifoctx->wr32(dev->fifoctx_fifo, v >> 4); } }; struct pci_device* pci; uint64_t vram_phys; uint64_t vram_total_size; uint64_t vram_mappable_size; unsigned chipset; enum nouveau_card_type card_type; unsigned channels; int grctx_grclasses; unsigned grctx_size; unsigned fifoctx_fifo; int fifoctx_grctx; unsigned fifoctx_size; unsigned ramhts; std::shared_ptr ramin; std::shared_ptr linear_ramin; std::shared_ptr dev_ramht; std::shared_ptr ramfc; std::shared_ptr ramro; std::shared_ptr users; std::shared_ptr grctx_table; nv_device(const char* pciname) : nv_region(this) { if(pciname) { unsigned domain, bus, devid, func; if(sscanf(pciname, "%x:%x:%x.%x", &domain, &bus, &devid, &func) < 4) throw std::runtime_error("unable to parse PCI name"); pci = pci_device_find_by_slot(domain, bus, devid, func); if(!pci) throw std::runtime_error("unable to find PCI device"); if(pci->vendor_id != 0x10de) throw std::runtime_error("not an nVidia card"); } else { struct pci_id_match match; memset(&match, 0, sizeof(match)); match.vendor_id = 0x10de; match.subvendor_id = PCI_MATCH_ANY; match.device_id = PCI_MATCH_ANY; match.subdevice_id = PCI_MATCH_ANY; struct pci_device_iterator *iter = pci_id_match_iterator_create(&match); pci = pci_device_next(iter); if(!pci) throw std::runtime_error("cannot find any nVidia card"); struct pci_device* second = pci_device_next(iter); if(second) throw std::runtime_error("more than one nVidia card: specify the desired one explicitly"); pci_iterator_destroy(iter); } pci_device_probe(pci); vram_phys = pci->regions[1].base_addr; size = pci->regions[0].size; pci_device_map_range(pci, pci->regions[0].base_addr, pci->regions[0].size, PCI_DEV_MAP_FLAG_WRITABLE, (void**)&ptr); uint32_t reg0 = rd32(NV03_PMC_BOOT_0); /* We're dealing with >=NV10 */ if ((reg0 & 0x0f000000) > 0) { /* Bit 27-20 contain the architecture in hex */ chipset = (reg0 & 0xff00000) >> 20; /* NV04 or NV05 */ } else if ((reg0 & 0xff00fff0) == 0x20004000) { if (reg0 & 0x00f00000) chipset = 0x05; else chipset = 0x04; } else chipset = 0xff; switch (chipset & 0xf0) { case 0x00: case 0x10: case 0x20: case 0x30: card_type = (nouveau_card_type)(chipset & 0xf0); break; case 0x40: case 0x60: card_type = NV_40; break; case 0x50: case 0x80: case 0x90: case 0xa0: card_type = NV_50; break; default: std::ostringstream ss; ss << "Unsupported chipset 0x" << std::hex << reg0; throw std::runtime_error(ss.str()); } if(card_type >= NV_50) channels = 128; else if(card_type >= NV_10) channels = 32; else channels = 16; vram_total_size = mem_fb_amount(); vram_mappable_size = vram_total_size; if(vram_mappable_size < pci->regions[1].size) vram_mappable_size = pci->regions[1].size; if (chipset >= 0x50) fifoctx_size = 256; else if (chipset >= 0x40) fifoctx_size = 128; else if (chipset >= 0x17) fifoctx_size = 64; else fifoctx_size = 32; if(dev->card_type < NV_10) fifoctx_fifo = 8; else if(dev->card_type < NV_50) fifoctx_fifo = 12; else fifoctx_fifo = 0x48; if(dev->card_type == NV_40) fifoctx_grctx = 56; else fifoctx_grctx = -1; init_grctx_info(); grctx_grclasses = -1; if(card_type == NV_40) { // TODO: parse ctxprogs to find out if(os->nv_driver == NV_DRIVER_NOUVEAU) grctx_grclasses = 0x40; } else if(card_type == NV_30) grctx_grclasses = 0x40; if(card_type < NV_50) ramhts = 1; else ramhts = channels; users.reset(new nv_users(this)); update(); } void update() { ramin.reset(new nv_ramin(this)); if(card_type < NV_50) { ramfc.reset(new nv_ramfc(ramin)); dev_ramht.reset(new nv_ramht(ramin)); ramro.reset(new nv_ramro(ramin)); linear_ramin = ramin; } else { linear_ramin.reset(new nv_linear_ramin(this)); } if(card_type == NV_20 || card_type == NV_30) grctx_table.reset(new nv20_grctx_table(ramin)); } static nv_device* open_default() { return new nv_device(getenv("NV_DEVICE")); } bool is_channel_enabled(unsigned channel) const { if(card_type < NV_50) return !!(rd32(NV04_PFIFO_MODE) & (1 << channel)); else return !!(rd32(NV50_PFIFO_CTX_TABLE(channel)) & NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED); } std::shared_ptr hwchannel(unsigned channel) const { std::shared_ptr fifoctx(this->fifoctx(channel)); if(fifoctx) return std::shared_ptr(new nv_hwchannel(const_cast(this), channel, fifoctx)); else return std::shared_ptr(); } std::shared_ptr ramht(unsigned channel) const { if(dev_ramht) return dev_ramht; else { std::shared_ptr fifoctx = this->fifoctx(channel); if(!fifoctx) return std::shared_ptr(); std::shared_ptr ramht = fifoctx->ramht(); ramht->channel = channel; return ramht; } } std::shared_ptr fifoctx(unsigned channel) const { if(card_type < NV_50) return std::shared_ptr(new nv_fifoctx(ramfc, channel)); else { uint32_t v = rd32(NV50_PFIFO_CTX_TABLE(channel)); uint32_t fc; if(chipset < 0x60) fc = v << 12; else fc = v << 8; if(fc >= linear_ramin->size) return std::shared_ptr(); return std::shared_ptr(new nv_fifoctx(linear_ramin, fc)); } } std::shared_ptr grctx(unsigned channel) const { if(card_type < NV_40) { if(card_type >= NV_20) { unsigned offset = dev->grctx_table->rd32(channel * 4) << 4; return std::shared_ptr(new nv_grctx(dev, offset)); } else return std::shared_ptr(); } else return fifoctx(channel)->grctx(); } private: void init_grctx_info() { if(card_type == NV_50) grctx_size = 0x70000; else if(card_type == NV_40) grctx_size = 175 * 1024; else { switch (chipset) { case 0x20: grctx_size = NV20_GRCTX_SIZE; //ctx_init = nv20_graph_context_init; //idoffs = 0; break; case 0x25: case 0x28: grctx_size = NV25_GRCTX_SIZE; //ctx_init = nv25_graph_context_init; break; case 0x2a: grctx_size = NV2A_GRCTX_SIZE; //ctx_init = nv2a_graph_context_init; //idoffs = 0; break; case 0x30: case 0x31: grctx_size = NV30_31_GRCTX_SIZE; //ctx_init = nv30_31_graph_context_init; break; case 0x34: grctx_size = NV34_GRCTX_SIZE; //ctx_init = nv34_graph_context_init; break; case 0x35: case 0x36: grctx_size = NV35_36_GRCTX_SIZE; //ctx_init = nv35_36_graph_context_init; break; default: grctx_size = 0; } } } uint64_t mem_fb_amount() { uint32_t boot0; switch (card_type) { case NV_04: boot0 = rd32(NV03_BOOT_0); if (boot0 & 0x00000100) return (((boot0 >> 12) & 0xf) * 2 + 2) * 1024 * 1024; switch (boot0 & NV03_BOOT_0_RAM_AMOUNT) { case NV04_BOOT_0_RAM_AMOUNT_32MB: return 32 * 1024 * 1024; case NV04_BOOT_0_RAM_AMOUNT_16MB: return 16 * 1024 * 1024; case NV04_BOOT_0_RAM_AMOUNT_8MB: return 8 * 1024 * 1024; case NV04_BOOT_0_RAM_AMOUNT_4MB: return 4 * 1024 * 1024; } break; case NV_10: case NV_20: case NV_30: case NV_40: case NV_50: default: // TODO: support nforce/nforce2 uint64_t mem; mem = (rd32(NV04_FIFO_DATA) & NV10_FIFO_DATA_RAM_AMOUNT_MB_MASK) >> NV10_FIFO_DATA_RAM_AMOUNT_MB_SHIFT; return mem * 1024 * 1024; } return 0; } }; typedef nv_device::nv_ramht nv_ramht; typedef nv_device::nv_ramin nv_ramin; typedef nv_device::nv_ramfc nv_ramfc; typedef nv_device::nv_grctx nv_grctx; typedef nv_device::nv_fifoctx nv_fifoctx; typedef nv_device::nv_hwchannel nv_hwchannel; typedef nv_device::nv_users nv_users; typedef nv_device::nv_user nv_user; typedef nv_device::nv_object nv_object; std::ostream& operator <<(std::ostream& out, const std::pair& deventry) { nv_device* dev = deventry.first; const nv_ramht_entry& entry = deventry.second; boost::io::ios_all_saver ias(out); out << (unsigned)entry.channel << ':' << hex08 << entry.handle << " @ " << hex08 << entry.instance << ": "; nv_object obj(dev->ramin, entry.instance); out << obj; return out; } int nv_find_idle_channel(struct nv_device* dev) { std::shared_ptr hwchan[128]; unsigned gets1[128]; unsigned puts1[128]; unsigned gets2[128]; unsigned puts2[128]; int idle = -1; for(unsigned i = 0; i < dev->channels; ++i) { bool enabled; if(i == 0 || i == 30 || i == 127) enabled = false; else enabled = dev->is_channel_enabled(i); if(enabled) { hwchan[i] = dev->hwchannel(i); gets1[i] = hwchan[i]->rd_get(); puts1[i] = hwchan[i]->rd_put(); } } sleep(1); for(unsigned i = 0; i < dev->channels; ++i) { if(hwchan[i]) { if(!dev->is_channel_enabled(i)) hwchan[i].reset(); if(hwchan[i]) { gets2[i] = hwchan[i]->rd_get(); puts2[i] = hwchan[i]->rd_put(); } } } for(unsigned i = 0; i < dev->channels; ++i) { if(hwchan[i] && gets1[i] && gets1[i] == puts1[i] && gets2[i] == puts2[i] && gets1[i] == gets2[i]) { idle = i; } } return idle; } bool nv_find_vram_ramht_entry(std::shared_ptr ramht, int channel, nv_ramht_entry& ret_entry) { int64_t best_limit = -1; for(unsigned i = 0; i < ramht->entries; ++i) { nv_ramht_entry entry = ramht->get_at(i); if(!entry.valid) continue; if(channel >= 0 && entry.channel != channel) continue; nv_object obj(ramht->dev->ramin, entry.instance); if(obj.type == nv_object::dma_linear && obj.dma_target == nv_object::vram && obj.dma_base == 0 && obj.dma_limit > best_limit) { best_limit = obj.dma_limit; ret_entry = entry; } } return !!best_limit; } struct nv_channel { std::shared_ptr hwchan; nv_channel(std::shared_ptr hwchan) : hwchan(hwchan) {} void out(uint32_t v) { outp(&v, 1); } virtual void outp(void* buf, int dwords) = 0; virtual void fire() = 0; virtual void wait_idle() = 0; virtual void wait(size_t size) = 0; }; struct nv_channel_direct : public nv_channel { uint32_t put; nv_channel_direct(std::shared_ptr hwchan) : nv_channel(hwchan) { } virtual void wait_idle() { uint32_t get; while(get != put) get = hwchan->rd_get(); } }; struct nv_channel_vram : public nv_channel_direct { bool taken; uint32_t orig_ctxdma; uint32_t orig_getput; uint32_t vram_handle; uint32_t vram_ctxdma; uint32_t base; uint32_t our_put; nv_channel_vram(std::shared_ptr hwchan) : nv_channel_direct(hwchan) { nv_ramht_entry entry; taken = false; if(!nv_find_vram_ramht_entry(hwchan->ramht, hwchan->channel, entry)) throw std::runtime_error("Unable to find a vram ctxdma"); vram_ctxdma = entry.instance; vram_handle = entry.handle; /* the middle of vram is hopefully away from anything critical */ put = hwchan->rd_put(); our_put = base = hwchan->dev->vram_mappable_size / 2; } virtual void outp(void* buf, int dwords) { os->memcpy_to_phys(hwchan->dev->vram_phys + our_put, (const char*)buf, dwords * 4); our_put += dwords * 4; } virtual void fire() { if(put != our_put) { if(!taken) { orig_getput = hwchan->rd_put(); put = base; orig_ctxdma = hwchan->rd_dma(); hwchan->wr_dma(vram_ctxdma); /* TODO: what if it is being run now? * TODO: move this logic to hwchan? */ hwchan->fifoctx->wr32(0, put); hwchan->fifoctx->wr32(4, put); //dev->wr_get(channel, put); hwchan->wr_put(put); taken = true; } put = our_put; hwchan->wr_put(put); } } virtual void wait(size_t s) { } }; struct nv_runner { virtual void run() = 0; }; struct nv_channel_parasite : public nv_channel_direct { std::shared_ptr dmaobj; uint32_t dmactx; uint32_t our_put; uint32_t orig_getput; nv_runner* runner; nv_channel_parasite(std::shared_ptr hwchan, nv_runner* runner) : nv_channel_direct(hwchan), runner(runner) { dmaobj.reset(new nv_object(hwchan->dev->ramin, hwchan->rd_dma())); orig_getput = our_put = put = hwchan->rd_put(); } virtual void outp(void* buf, int dwords) { // std::cout << "put = " << std::hex << our_put << std::endl; uint64_t phys = dmaobj->dma_to_phys(our_put); os->memcpy_to_phys(phys, buf, dwords * 4); our_put += dwords * 4; } virtual void fire() { if(put != our_put) { put = our_put; hwchan->wr_put(put); } } virtual void wait(size_t dwords) { assert(dwords < 0x200); for(;;) { unsigned left = 0x1000 - (our_put & 0xfff); if(left >= (dwords * 4)) break; fire(); wait_idle(); hwchan->wr_get(orig_getput); hwchan->wr_put(orig_getput); runner->run(); wait_idle(); orig_getput = our_put = put = hwchan->rd_put(); } } }; /* Channel implementation based on stealing the channel of a spawned GL process */ struct nv_gl_process : public nv_runner { pid_t pid; nv_gl_process(struct nv_device* dev, const char* name = "glxgears") { pid = fork(); if(!pid) { int devnull = open("/dev/null", O_RDWR); dup2(devnull, 1); dup2(devnull, 2); close(devnull); execlp(name, name, NULL); exit(1); } try { std::cerr << "Please wait a few seconds while we start the helper GL program..." << std::endl; sleep(2); kill(pid, SIGSTOP); sleep(1); } catch(...) { kill(pid, SIGKILL); throw; } } ~nv_gl_process() { kill(pid, SIGKILL); kill(pid, SIGCONT); } virtual void run() { kill(pid, SIGCONT); sched_yield(); kill(pid, SIGSTOP); } }; #define RING(subc, mthd, size) (((subc) << 13) | ((size) << 18) | (mthd)) #endif /* NVLIB_H_ */