diff options
author | Luca Barbieri <luca@luca-barbieri.com> | 2010-03-29 03:01:01 +0200 |
---|---|---|
committer | Luca Barbieri <luca@luca-barbieri.com> | 2010-03-29 03:01:01 +0200 |
commit | 6dd4ed164f2e0f7a8935c40cdc406ecf23df8d0b (patch) | |
tree | 71f968044764e689af5bd6a79b5aadcf03ca16fe | |
parent | 74947f4067a6d35c4376aedef4048b48bff4c03f (diff) |
lots of changes
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | channels.cpp | 3 | ||||
-rw-r--r-- | nvexec_raw.cpp | 29 | ||||
-rw-r--r-- | nvlib.h | 346 | ||||
-rw-r--r-- | ramfc.cpp | 14 | ||||
-rw-r--r-- | ramht.cpp | 24 |
6 files changed, 294 insertions, 126 deletions
@@ -1,6 +1,6 @@ -CXXFLAGS = -g -O0 -Wall -fmessage-length=0 -fno-inline-functions -fno-inline +CXXFLAGS = --std=c++0x -g -O0 -Wall -fmessage-length=0 -fno-inline-functions -fno-inline LIBS = -lpciaccess -TARGETS = nvexec_raw ramfc channels ramht pgraph peek poke dump dump-nonzero +TARGETS = nvexec_raw ramht ramin ramfc channels pgraph peek poke dump dump-nonzero all: $(TARGETS) diff --git a/channels.cpp b/channels.cpp index 65714be..b0716ad 100644 --- a/channels.cpp +++ b/channels.cpp @@ -3,8 +3,7 @@ using namespace std; int main(int argc, char** argv) { - std::auto_ptr<nv_device> dev; - dev.reset(nv_device::open_default()); + unique_ptr<nv_device> dev(nv_device::open_default()); for(unsigned i = 0; i < dev->channels; ++i) { if(i) diff --git a/nvexec_raw.cpp b/nvexec_raw.cpp index fccbbfd..19ff229 100644 --- a/nvexec_raw.cpp +++ b/nvexec_raw.cpp @@ -1,33 +1,30 @@ #include "nvlib.h" #include <fstream> +using namespace std; int main(int argc, char** argv) { - std::auto_ptr<nv_device> dev; - std::auto_ptr<nv_gl_process> runner; - std::auto_ptr<nv_channel> chan; - dev.reset(nv_device::open_default()); - - runner.reset(new nv_gl_process(&*dev)); + unique_ptr<nv_device> dev(nv_device::open_default()); + unique_ptr<nv_gl_process> runner(new nv_gl_process(&*dev)); int channel = nv_find_idle_channel(&*dev); if(channel < 0) - throw std::runtime_error("Unable to find idle channel for GL process.\n"); + throw runtime_error("Unable to find idle channel for GL process.\n"); - chan.reset(new nv_channel_vram(&*dev, channel)); + unique_ptr<nv_channel> chan(new nv_channel_vram(&*dev, channel)); //chan.reset(new nv_channel_parasite(&*dev, channel, &*runner)); - std::cerr << "Channel " << chan->channel << std::endl; + cerr << "Channel " << chan->channel << endl; unsigned grclasses[8]; dev->get_grclasses(chan->channel, grclasses); // for(unsigned i = 0; i < 8; ++i) -// std::cout << grclasses[i] << std::endl; +// cout << grclasses[i] << endl; - std::vector<std::pair<unsigned, unsigned> > commands; - std::ifstream fin; - std::istream& in = argc >= 2 ? fin : std::cin; + vector<pair<unsigned, unsigned> > commands; + ifstream fin; + istream& in = argc >= 2 ? fin : cin; if(argc >= 2) fin.open(argv[1]); @@ -44,13 +41,13 @@ int main(int argc, char** argv) break; } // if(subc == 8) { -// std::cerr << "Unable to find subchannel for grclass " << grclass << std::endl; +// cerr << "Unable to find subchannel for grclass " << grclass << endl; // return 1; // } - //std::cout << subc << ' ' << method << ' ' << value << std::endl; + //cout << subc << ' ' << method << ' ' << value << endl; - commands.push_back(std::make_pair(RING(subc, method, 1), value)); + commands.push_back(make_pair(RING(subc, method, 1), value)); } for(unsigned i = 0; i < commands.size(); ++i) @@ -20,6 +20,7 @@ #include <iostream> #include <iomanip> #include <ios> +#include <boost/io/ios_state.hpp> #include <sstream> #include <memory> #include <vector> @@ -30,6 +31,12 @@ #include <nouveau/nouveau_class.h> #define PAGE_SIZE 4096 +inline std::ostream& +hex08(std::ostream& out) +{ + return out << std::hex << std::setw(8) << std::setfill('0'); +} + #define NV_PFIFO_RAMHT 0x2210 enum nouveau_card_type { @@ -236,6 +243,14 @@ unsigned nvidia_grclasses[8 * 6] = { 0, }; +#define NV20_GRCTX_SIZE (3580*4) +#define NV25_GRCTX_SIZE (3529*4) +#define NV2A_GRCTX_SIZE (3500*4) + +#define NV30_31_GRCTX_SIZE (24392) +#define NV34_GRCTX_SIZE (18140) +#define NV35_36_GRCTX_SIZE (22396) + struct nv_ramht_entry { bool valid; @@ -243,6 +258,19 @@ struct nv_ramht_entry unsigned char engine; unsigned handle; unsigned instance; + + static bool channel_handle_less(const nv_ramht_entry& a, const nv_ramht_entry& b) + { + if(a.channel != b.channel) + return a.channel < b.channel; + else + return a.handle < b.handle; + } + + static bool instance_less(const nv_ramht_entry& a, const nv_ramht_entry& b) + { + return a.instance < b.instance; + } }; struct nv_region @@ -258,25 +286,28 @@ struct nv_region : ptr(ptr), size(size) {} - uint32_t rd32(uint32_t off) + uint32_t rd32(uint32_t off) const { return *(volatile uint32_t*)(ptr + off); } - void wr32(uint32_t off, uint32_t value) + void wr32(uint32_t off, uint32_t value) const { *(volatile uint32_t*)(ptr + off) = value; } -}; -struct nv_device : public nv_region -{ - static bool is_dma_class(unsigned word) + int offset_in(const nv_region& container) const { - unsigned type = word & 0xff; - return (type == 0x3d || type == 2 || type == 3); + if(container.ptr > ptr) + return -1; + if((ptr + size) > (container.ptr + container.size)) + return -1; + return ptr - container.ptr; } +}; +struct nv_device : public nv_region +{ struct nv_ramin : public nv_region { struct nv_device* dev; @@ -437,87 +468,140 @@ struct nv_device : public nv_region } }; - struct nv_dma_object : public nv_region + struct nv_object : public nv_region { - struct nv_ramin* ramin; - uint32_t tag; - unsigned limit; - - bool is_linear() + enum nv_object_type { - return (tag & (1 << 13)) != 0; - } + grobj, + dma_paged, + dma_linear + }; - bool is_present() + enum nv_target { - return (tag & (1 << 12)) != 0; - } + vram, + vram_tiled, + pci, + gart + }; - unsigned type() - { - return (tag >> 16) & 3; - } + struct nv_ramin* ramin; + nv_object_type type; + uint32_t tag; + int64_t dma_base; + int64_t dma_limit; + bool dma_present; + nv_target dma_target; - const char* type_str() + static const char* dma_target_str(nv_target dma_target) { - const char* type_strs[4] = {"VRAM", "VRAM_TILED", "PCI", "AGP"}; - return type_strs[this->type()]; + const char* dma_target_strs[4] = {"VRAM", "VRAM_TILED", "PCI", "AGP"}; + return dma_target_strs[dma_target]; } - nv_dma_object(struct nv_ramin* ramin, uint32_t offset) + nv_object(struct nv_ramin* ramin, uint32_t offset) : ramin(ramin) { assert (ramin->dev->card_type < NV_50); ptr = ramin->ptr + offset; - - tag = rd32(0); - limit = rd32(4); - if(!is_present()) - size = 8; - else if(is_linear()) - size = 12; - else - size = (((limit + 0xfff) >> 12) + 2) * 4; + tag = rd32(0); + dma_base = -1; + dma_limit = -1; + unsigned objclass = tag & 0xff; + if((objclass == 0x3d || objclass == 2 || objclass == 3)) { + if(ramin->dev->card_type < NV_50) { + type = (tag & (1 << 13)) ? dma_linear : dma_paged; + dma_limit = rd32(4); + dma_target = (nv_target)((tag >> 16) & 3); + dma_present = !!(tag & (1 << 12)); + if(!dma_present) /* TODO: guess */ + size = 8; + else if(type == dma_linear) { + dma_base = (rd32(8) & ~0xfff) | (tag & 0xff000000); + size = 12; + } else + size = (((dma_limit + 0xfff) >> 12) + 2) * 4; + } else { + /* TODO: nv50 non-linear ctxdmas? flags? */ + unsigned v3 = rd32(12); + dma_limit = rd32(4) | ((uint64_t)(v3 & 0xff000000) << 8); + dma_base = rd32(8) | ((uint64_t)(v3 & 0xff) << 32); + size = 24; + // TODO: this is almost surely incomplete + type = dma_linear; + dma_present = true; + if(tag & 0xf0000) + dma_target = vram; + else + dma_target = gart; + } + } else { + type = grobj; + /*XXX: dodgy hack for now */ + if (ramin->dev->card_type >= NV_50) + size = 24; + else if (ramin->dev->card_type >= NV_40) + size = 32; + else + size = 16; + } } - uint64_t dma_to_phys(uint32_t off) + uint64_t dma_to_linear(uint32_t off) const { uint32_t size; uint64_t addr; + assert(type != grobj); + if (ramin->dev->card_type < NV_50) { - uint32_t hdr = rd32(0); - unsigned type; - size = rd32(4) + 1; + size = dma_limit + 1; if(off >= size) return ~0ULL; - if(!(hdr & (1 << 12))) - throw std::runtime_error("pagetable not present"); - - // std::cout << "FOO " << std::hex << hdr << std::endl; + if(!dma_present) + return ~0ULL; - type = (hdr >> 16) & 3; - if(type == NV_DMA_TARGET_VIDMEM) - addr = ramin->dev->vram_phys; - else if(type == NV_DMA_TARGET_PCI) - addr = 0; - else if(type == NV_DMA_TARGET_AGP) - //throw std::runtime_error("AGP not implemented"); - addr = 0; + if(type == dma_paged) { + uint64_t pte = rd_pte(off >> 12); + addr += (pte & ~0xfff) + (off & 0xfff); + } else if(type == dma_linear) + return dma_base + off; else assert(0); + } else { + assert(type == dma_linear); + return dma_base + off; + } - if(!is_linear()) { - /* not linear */ - uint64_t pte = read_pte(off >> 12); - addr += (pte & ~0xfff) + (off & 0xfff); - } else - addr += (read_pte(0) & ~0xfff) + off; - } else + return addr; + } + + /* TODO: this probably should be redesigned */ + uint64_t dma_to_phys(uint32_t off) const + { + uint64_t addr; + + if (ramin->dev->card_type < NV_50) { + uint64_t addr = dma_to_linear(off); + if(addr == ~0ULL) + return addr; + + if(dma_target == NV_DMA_TARGET_VIDMEM) { + if(addr >= ramin->dev->vram_mappable_size) + return ~0ULL; + else + return addr + ramin->dev->vram_phys; + } else if(dma_target == NV_DMA_TARGET_PCI) + return addr; + else if(dma_target == NV_DMA_TARGET_AGP) + throw std::runtime_error("AGP not implemented"); + else + assert(0); + } else /* TODO: VM... */ assert(0); return addr; @@ -525,14 +609,40 @@ struct nv_device : public nv_region unsigned num_ptes() { + assert(type == dma_paged); return (size >> 2) - 2; } - uint64_t read_pte(unsigned i) + uint64_t rd_pte(unsigned i) const { - assert(ramin->dev->card_type < NV_50); + assert(type == dma_paged); return rd32(8 + i * 4) | (tag & 0xff000000); } + + void wr_pte_present_wr(unsigned i, uint64_t offset) const { + assert(type == dma_paged); + wr32(8 + i * 4, offset | 3); + } + + void print(std::ostream& out) const { + boost::io::ios_all_saver ias(out); + + if(type == grobj) { + out << "GR"; + for(unsigned i = 0; i < size; i += 4) + out << ' ' << hex08 << rd32(i); + } else { + out << dma_target_str(dma_target) << ' ' << hex08 << tag << ' ' << hex08 << dma_limit; + if(type == dma_linear) + out << " -> " << hex08 << dma_base; + } + } + + friend std::ostream& operator <<(std::ostream& out, const nv_object& obj) + { + obj.print(out); + return out; + } }; struct nv_ramfc : public nv_region @@ -633,6 +743,15 @@ struct nv_device : public nv_region typedef nv_users::nv_user nv_user; + struct nv_ramro : public nv_region + { + nv_ramro(nv_ramin* ramin) + { + ptr = ramin->ptr + (ramin->dev->rd32(NV03_PFIFO_RAMRO) << 8); + size = 512; + } + }; + struct pci_device* pci; uint64_t vram_phys; @@ -645,10 +764,13 @@ struct nv_device : public nv_region unsigned channels; int grctx_grclasses; - std::auto_ptr<nv_ramin> ramin; - std::auto_ptr<nv_ramht> ramht; - std::auto_ptr<nv_ramfc> ramfc; - std::auto_ptr<nv_users> users; + unsigned grctx_size; + + std::unique_ptr<nv_ramin> ramin; + std::unique_ptr<nv_ramht> ramht; + std::unique_ptr<nv_ramfc> ramfc; + std::unique_ptr<nv_ramro> ramro; + std::unique_ptr<nv_users> users; nv_device(const char* pciname) { @@ -741,11 +863,18 @@ struct nv_device : public nv_region ramin.reset(new nv_ramin(this)); ramfc.reset(new nv_ramfc(&*ramin)); ramht.reset(new nv_ramht(&*ramin)); + ramro.reset(new nv_ramro(&*ramin)); users.reset(new nv_users(this)); + init_grctx_info(); + grctx_grclasses = -1; - if(os->nv_driver == NV_DRIVER_NOUVEAU) - grctx_grclasses = 0x40; + if(card_type == NV_40) { + // TODO: parse ctxprogs to find out + if(os->nv_driver == NV_DRIVER_NOUVEAU) + grctx_grclasses = 0x40; + } + } static nv_device* open_default() @@ -785,6 +914,13 @@ struct nv_device : public nv_region users->user[channel]->wr32(0x40, value); } + uint32_t get_grctx(unsigned channel) { + if(ramfc->fc_grctx >= 0) + return ramfc->fc[channel]->rd32(ramfc->fc_grctx) << 4; + else + assert(0); + } + void get_grclasses(unsigned channel, unsigned grclasses[8]) { if(os->nv_driver == NV_DRIVER_NVIDIA) { @@ -801,6 +937,47 @@ struct nv_device : public nv_region private: + void init_grctx_info() { + if(card_type == NV_50) + grctx_size = 0x70000; + else if(card_type == NV_40) + grctx_size = 175 * 1024; + else { + switch (chipset) { + case 0x20: + grctx_size = NV20_GRCTX_SIZE; + //ctx_init = nv20_graph_context_init; + //idoffs = 0; + break; + case 0x25: + case 0x28: + grctx_size = NV25_GRCTX_SIZE; + //ctx_init = nv25_graph_context_init; + break; + case 0x2a: + grctx_size = NV2A_GRCTX_SIZE; + //ctx_init = nv2a_graph_context_init; + //idoffs = 0; + break; + case 0x30: + case 0x31: + grctx_size = NV30_31_GRCTX_SIZE; + //ctx_init = nv30_31_graph_context_init; + break; + case 0x34: + grctx_size = NV34_GRCTX_SIZE; + //ctx_init = nv34_graph_context_init; + break; + case 0x35: + case 0x36: + grctx_size = NV35_36_GRCTX_SIZE; + //ctx_init = nv35_36_graph_context_init; + break; + default: + grctx_size = 0; + } + } + } uint64_t mem_fb_amount() { uint32_t boot0; @@ -846,7 +1023,20 @@ typedef nv_device::nv_ramfc nv_ramfc; typedef nv_device::nv_fc nv_fc; typedef nv_device::nv_users nv_users; typedef nv_device::nv_user nv_user; -typedef nv_device::nv_dma_object nv_dma_object; +typedef nv_device::nv_object nv_object; + +std::ostream& operator <<(std::ostream& out, const std::pair<nv_device*, nv_ramht_entry>& deventry) +{ + nv_device* dev = deventry.first; + const nv_ramht_entry& entry = deventry.second; + boost::io::ios_all_saver ias(out); + + out << (unsigned)entry.channel << ':' << hex08 << entry.handle << " @ " << hex08 << entry.instance << ": "; + + nv_object obj(&*dev->ramin, entry.instance); + out << obj; + return out; +} int nv_find_idle_channel(struct nv_device* dev) { @@ -890,7 +1080,7 @@ int nv_find_idle_channel(struct nv_device* dev) bool nv_find_vram_ramht_entry(struct nv_device* dev, int channel, nv_ramht_entry& ret_entry) { - unsigned best_limit = 0; + int64_t best_limit = -1; for(unsigned i = 0; i < dev->ramht->entries; ++i) { nv_ramht_entry entry = dev->ramht->get_at(i); if(!entry.valid) @@ -899,15 +1089,11 @@ bool nv_find_vram_ramht_entry(struct nv_device* dev, int channel, nv_ramht_entry if(channel >= 0 && entry.channel != channel) continue; - uint32_t tag = dev->ramin->rd32(entry.instance); - if(nv_device::is_dma_class(tag)) { - std::auto_ptr<nv_dma_object> dmaobj(new nv_dma_object(&*dev->ramin, entry.instance)); - - if(dmaobj->type() == 0 && dmaobj->is_linear() - && !(dmaobj->read_pte(0) & ~0xfff) && dmaobj->limit > best_limit) { - best_limit = dmaobj->limit; - ret_entry = entry; - } + nv_object obj(&*dev->ramin, entry.instance); + if(obj.type == nv_object::dma_linear && obj.dma_target == nv_object::vram + && obj.dma_base == 0 && obj.dma_limit > best_limit) { + best_limit = obj.dma_limit; + ret_entry = entry; } } return !!best_limit; @@ -1013,7 +1199,7 @@ struct nv_channel_runner struct nv_channel_parasite : public nv_channel_direct { - std::auto_ptr<nv_dma_object> dmaobj; + std::unique_ptr<nv_object> dmaobj; uint32_t dmactx; uint32_t put; uint32_t our_put; @@ -1024,7 +1210,7 @@ struct nv_channel_parasite : public nv_channel_direct : nv_channel_direct(dev, channel), runner(runner) { uint32_t dmactx = dev->ramfc->fc[channel]->rd32(dev->ramfc->fc_fifo) << 4; - dmaobj.reset(new nv_dma_object(&*dev->ramin, dmactx)); + dmaobj.reset(new nv_object(&*dev->ramin, dmactx)); orig_getput = our_put = put = dev->rd_put(channel); } @@ -1,18 +1,20 @@ #include "nvlib.h" +using namespace std; int main(int argc, char** argv) { - std::auto_ptr<nv_device> dev; - dev.reset(nv_device::open_default()); + unique_ptr<nv_device> dev(nv_device::open_default()); for(unsigned i = 0; i < dev->channels; ++i) { - std::cout << "\nChannel " << i ; + if(i) + cout << i; + cout << "Channel " << i ; for(unsigned j = 0; j < dev->ramfc->fc_size; j += 4) { if(!(j & 15)) - std::cout << std::endl; - std::cout << std::hex << std::setw(8) << dev->ramfc->fc[i]->rd32(j) << ' '; + cout << endl; + cout << hex08 << dev->ramfc->fc[i]->rd32(j) << ' '; } - std::cout << std::endl; + cout << endl; } return 0; } @@ -1,33 +1,17 @@ #include "nvlib.h" +#include <boost/io/ios_state.hpp> using namespace std; +using namespace boost; int main(int argc, char** argv) { - std::auto_ptr<nv_device> dev; - dev.reset(nv_device::open_default()); + unique_ptr<nv_device> dev(nv_device::open_default()); for(unsigned i = 0; i < dev->ramht->entries; ++i) { nv_ramht_entry entry = dev->ramht->get_at(i); if(!entry.valid) continue; - printf("[%i] %i:%08x @ %08x:", i, entry.channel, entry.handle, entry.instance); - uint32_t tag = dev->ramin->rd32(entry.instance); - if(!nv_device::is_dma_class(tag)) { - uint32_t v[4]; - for(unsigned j = 0; j < 4; ++j) - v[j] = dev->ramin->rd32(entry.instance + j * 4); - printf(" GR %08x %08x %08x %08x\n", v[0], v[1], v[2], v[3]); - } else { - auto_ptr<nv_dma_object> dmaobj(new nv_dma_object(&*dev->ramin, entry.instance)); - printf(" %s %08x %08x", dmaobj->type_str(), dmaobj->tag, dmaobj->limit); - if(!dmaobj->is_linear()) { - printf("\n"); - unsigned ptes = dmaobj->num_ptes(); - for(unsigned i = 0; i < ptes; ++i) - printf("\t%i -> %08Lx\n", i, dmaobj->read_pte(i)); - } else - printf(" -> %08Lx\n", dmaobj->read_pte(0)); - } + cout << "[" << i << "] " << make_pair(&*dev, entry) << endl; } return 0; } |