diff options
-rw-r--r-- | channels.cpp | 15 | ||||
-rw-r--r-- | nvexec_raw.cpp | 9 | ||||
-rw-r--r-- | nvlib.h | 510 | ||||
-rw-r--r-- | ramfc.cpp | 14 | ||||
-rw-r--r-- | ramin.cpp | 25 |
5 files changed, 347 insertions, 226 deletions
diff --git a/channels.cpp b/channels.cpp index b0716ad..c1160e3 100644 --- a/channels.cpp +++ b/channels.cpp @@ -5,14 +5,19 @@ int main(int argc, char** argv) { unique_ptr<nv_device> dev(nv_device::open_default()); + unsigned chans = 0; for(unsigned i = 0; i < dev->channels; ++i) { - if(i) + shared_ptr<nv_hwchannel> hwchan = dev->hwchannel(i); + if(!hwchan) + continue; + if(chans) printf("\n"); printf("Channel %i\n", i); - printf(" enabled = %i\n", dev->is_channel_enabled(i)); - printf(" dma = %08x\n", dev->ramfc->fc[i]->rd32(dev->ramfc->fc_fifo) << 4); - printf(" get = %08x\n", dev->rd_get(i)); - printf(" put = %08x\n", dev->rd_put(i)); + printf("\tenabled = %i\n", hwchan->enabled()); + printf("\tdma = %08x\n", hwchan->rd_dma()); + printf("\tget = %08x\n", hwchan->rd_get()); + printf("\tput = %08x\n", hwchan->rd_put()); + ++chans; } return 0; } diff --git a/nvexec_raw.cpp b/nvexec_raw.cpp index 550202f..a55bf8d 100644 --- a/nvexec_raw.cpp +++ b/nvexec_raw.cpp @@ -11,13 +11,14 @@ int main(int argc, char** argv) if(channel < 0) throw runtime_error("Unable to find idle channel for GL process.\n"); - unique_ptr<nv_channel> chan(new nv_channel_vram(&*dev, channel)); - //chan.reset(new nv_channel_parasite(&*dev, channel, &*runner)); + std::shared_ptr<nv_hwchannel> hwchan(dev->hwchannel(channel)); + unique_ptr<nv_channel> chan(new nv_channel_vram(hwchan)); + //unique_ptr<nv_channel> chan(new nv_channel_parasite(hwchan, &*runner)); - cerr << "Channel " << chan->channel << endl; + cerr << "Channel " << hwchan->channel << endl; unsigned grclasses[8]; - dev->get_grclasses(chan->channel, grclasses); + hwchan->get_grclasses(grclasses); // for(unsigned i = 0; i < 8; ++i) // cout << grclasses[i] << endl; @@ -272,17 +272,20 @@ struct nv_ramht_entry } }; +struct nv_device; + struct nv_region { + nv_device* dev; char* ptr; size_t size; - nv_region() - : ptr(0), size(0) + nv_region(nv_device* dev) + : dev(dev), ptr(0), size(0) {} - nv_region(char* ptr, size_t size) - : ptr(ptr), size(size) + nv_region(nv_device* dev, char* ptr, size_t size) + : dev(dev), ptr(ptr), size(size) {} uint32_t rd32(uint32_t off) const @@ -309,12 +312,10 @@ struct nv_device : public nv_region { struct nv_ramin : public nv_region { - struct nv_device* dev; - nv_ramin(struct nv_device* dev) - : dev(dev) + : nv_region(dev) { - /* map larger RAMIN aperture on NV40 cards */ + /* map larger RAMIN aperture on NV40-NV50 cards */ ptr = NULL; if (dev->card_type >= NV_40) { int ramin_bar = 2; @@ -338,24 +339,35 @@ struct nv_device : public nv_region struct nv_ramht : public nv_region { - struct nv_ramin* ramin; + std::shared_ptr<nv_ramin> ramin; unsigned bits; unsigned entries; - unsigned search_shift; // TODO: what is this exactly? + //unsigned search_shift; // TODO: what is this exactly? - nv_ramht(struct nv_ramin* ramin) - : ramin(ramin) + nv_ramht(std::shared_ptr<nv_ramin> ramin) + : nv_region(ramin->dev), ramin(ramin) { - uint32_t reg = ramin->dev->rd32(NV_PFIFO_RAMHT); + assert(dev->card_type < NV_50); + uint32_t reg = dev->rd32(NV_PFIFO_RAMHT); bits = ((reg >> 16) & 0xf) + 9; ptr = ramin->ptr + ((reg & 0xffff) << 8); entries = 1 << bits; size = entries * 8; - search_shift = (reg >> 24) + 4; + //search_shift = (reg >> 24) + 4; + } + + nv_ramht(std::shared_ptr<nv_ramin> ramin, uint32_t offset) + : nv_region(ramin->dev), ramin(ramin) + { + assert(dev->card_type >= NV_50); + ptr = ramin->ptr + offset; + bits = 9; + entries = 1 << bits; + size = entries * 8; } - uint32_t hash_handle(int channel, uint32_t handle) + uint32_t hash_handle(int channel, uint32_t handle) const { uint32_t hash = 0; int i; @@ -365,14 +377,14 @@ struct nv_device : public nv_region handle >>= bits; } - if (ramin->dev->card_type < NV_50) + if (dev->card_type < NV_50) hash ^= channel << (bits - 4); hash <<= 3; return hash; } - int find(unsigned channel, uint32_t handle, nv_ramht_entry& entry) + int find(unsigned channel, uint32_t handle, nv_ramht_entry& entry) const { unsigned start = hash_handle(channel, handle); unsigned i = start; @@ -395,22 +407,22 @@ struct nv_device : public nv_region return -1; } - nv_ramht_entry get_at(unsigned i) + nv_ramht_entry get_at(unsigned i) const { nv_ramht_entry entry; uint32_t ctx; entry.handle = rd32(i * 8); ctx = rd32(i * 8 + 4); - if(ramin->dev->card_type < NV_40) { + if(dev->card_type < NV_40) { entry.valid = (ctx & (1 << 31)) != 0; entry.engine = (ctx >> 16) & 3; - entry.channel = (ctx >> 24) & (ramin->dev->channels - 1); + entry.channel = (ctx >> 24) & (dev->channels - 1); entry.instance = (ctx & 0xffff) << 4; - } else if(ramin->dev->card_type < NV_50) { + } else if(dev->card_type < NV_50) { entry.valid = ctx != 0; entry.engine = (ctx >> 20) & 3; - entry.channel = (ctx >> 23) & (ramin->dev->channels - 1);; + entry.channel = (ctx >> 23) & (dev->channels - 1);; entry.instance = (ctx & 0xfffff) << 4; } else { entry.valid = ctx != 0; @@ -427,13 +439,13 @@ struct nv_device : public nv_region return entry; } - void set_at(unsigned i, const nv_ramht_entry& entry) + void set_at(unsigned i, const nv_ramht_entry& entry) const { uint32_t ctx; wr32(i * 8, entry.handle); - if(ramin->dev->card_type < NV_40) + if(dev->card_type < NV_40) ctx = (entry.instance >> 4) | (entry.engine << 16) | (entry.channel << 23); - else if(ramin->dev->card_type < NV_50) + else if(dev->card_type < NV_50) ctx = (entry.instance >> 4) | (entry.engine << 20) | (entry.channel << 24); else if(entry.engine == 2) ctx = (entry.instance << 10) | 2; @@ -442,13 +454,13 @@ struct nv_device : public nv_region wr32(i * 8 + 4, ctx); } - void clear_at(unsigned i) + void clear_at(unsigned i) const { wr32(i * 8, 0); wr32(i * 8 + 4, 0); } - void insert(const nv_ramht_entry& entry) + void insert(const nv_ramht_entry& entry) const { nv_ramht_entry cur; int i = find(entry.channel, entry.handle, cur); @@ -458,7 +470,7 @@ struct nv_device : public nv_region set_at(i, entry); } - void remove(unsigned channel, uint32_t handle) + void remove(unsigned channel, uint32_t handle) const { nv_ramht_entry entry; int i = find(channel, handle, entry); @@ -484,7 +496,7 @@ struct nv_device : public nv_region gart }; - struct nv_ramin* ramin; + std::shared_ptr<nv_ramin> ramin; nv_object_type type; uint32_t tag; int64_t dma_base; @@ -498,10 +510,10 @@ struct nv_device : public nv_region return dma_target_strs[dma_target]; } - nv_object(struct nv_ramin* ramin, uint32_t offset) - : ramin(ramin) + nv_object(std::shared_ptr<nv_ramin> ramin, uint32_t offset) + : nv_region(ramin->dev), ramin(ramin) { - assert (ramin->dev->card_type < NV_50); + assert (dev->card_type < NV_50); ptr = ramin->ptr + offset; @@ -510,7 +522,7 @@ struct nv_device : public nv_region dma_limit = -1; unsigned objclass = tag & 0xff; if((objclass == 0x3d || objclass == 2 || objclass == 3)) { - if(ramin->dev->card_type < NV_50) { + if(dev->card_type < NV_50) { type = (tag & (1 << 13)) ? dma_linear : dma_paged; dma_limit = rd32(4); dma_target = (nv_target)((tag >> 16) & 3); @@ -539,9 +551,9 @@ struct nv_device : public nv_region } else { type = grobj; /*XXX: dodgy hack for now */ - if (ramin->dev->card_type >= NV_50) + if (dev->card_type >= NV_50) size = 24; - else if (ramin->dev->card_type >= NV_40) + else if (dev->card_type >= NV_40) size = 32; else size = 16; @@ -555,7 +567,7 @@ struct nv_device : public nv_region assert(type != grobj); - if (ramin->dev->card_type < NV_50) { + if (dev->card_type < NV_50) { size = dma_limit + 1; if(off >= size) @@ -584,16 +596,16 @@ struct nv_device : public nv_region { uint64_t addr; - if (ramin->dev->card_type < NV_50) { + if (dev->card_type < NV_50) { uint64_t addr = dma_to_linear(off); if(addr == ~0ULL) return addr; if(dma_target == NV_DMA_TARGET_VIDMEM) { - if(addr >= ramin->dev->vram_mappable_size) + if(addr >= dev->vram_mappable_size) return ~0ULL; else - return addr + ramin->dev->vram_phys; + return addr + dev->vram_phys; } else if(dma_target == NV_DMA_TARGET_PCI) return addr; else if(dma_target == NV_DMA_TARGET_AGP) @@ -606,7 +618,7 @@ struct nv_device : public nv_region return addr; } - unsigned num_ptes() + unsigned num_ptes() const { assert(type == dma_paged); return (size >> 2) - 2; @@ -646,62 +658,21 @@ struct nv_device : public nv_region struct nv_ramfc : public nv_region { - struct nv_fc : public nv_region - { - nv_ramfc* ramfc; - - nv_fc(nv_ramfc* ramfc, unsigned channel) - : ramfc(ramfc) - { - ptr = ramfc->ptr + ramfc->fc_size * channel; - size = ramfc->fc_size; - } - }; + std::shared_ptr<nv_ramin> ramin; - struct nv_ramin* ramin; - unsigned fc_size; - unsigned fc_fifo; - int fc_grctx; - struct nv_fc* fc[128]; - - nv_ramfc(struct nv_ramin* ramin) - : ramin(ramin) + nv_ramfc(std::shared_ptr<nv_ramin> ramin) + : nv_region(ramin->dev), ramin(ramin) { - if (ramin->dev->chipset >= 0x40) - fc_size = 128; - else if (ramin->dev->chipset >= 0x17) - fc_size = 64; - else - fc_size = 32; - - if(ramin->dev->card_type >= NV_50) - assert(0); - else if(ramin->dev->card_type >= NV_40) + assert(dev->card_type < NV_50); + if(dev->card_type >= NV_40) ptr = ramin->ptr + 0x20000; else ptr = ramin->ptr + 0x11400; - size = fc_size * ramin->dev->channels; - - if(ramin->dev->card_type < NV_10) - fc_fifo = 8; - else if(ramin->dev->card_type < NV_50) - fc_fifo = 12; - else - fc_fifo = 0x48; - - if(ramin->dev->card_type == NV_40) - fc_grctx = 56; - else - fc_grctx = -1; - - for(unsigned i = 0; i < ramin->dev->channels; ++i) - fc[i] = new nv_fc(this, i); + size = dev->fifoctx_size * dev->channels; } }; - typedef nv_ramfc::nv_fc nv_fc; - struct nv_users : public nv_region { struct nv_user : public nv_region @@ -709,7 +680,7 @@ struct nv_device : public nv_region nv_users* users; nv_user(nv_users* users, unsigned channel) - : users(users) + : nv_region(users->dev), users(users) { ptr = users->ptr + users->user_size * channel; size = users->user_size; @@ -721,6 +692,7 @@ struct nv_device : public nv_region nv_user* user[128]; nv_users(nv_device* dev) + : nv_region(dev) { if (dev->card_type < NV_40) { ptr = dev->ptr + NV03_USER(0); @@ -744,22 +716,154 @@ struct nv_device : public nv_region struct nv_ramro : public nv_region { - nv_ramro(nv_ramin* ramin) + nv_ramro(std::shared_ptr<nv_ramin> ramin) + : nv_region(ramin->dev) { - ptr = ramin->ptr + (ramin->dev->rd32(NV03_PFIFO_RAMRO) << 8); + ptr = ramin->ptr + (dev->rd32(NV03_PFIFO_RAMRO) << 8); size = 512; } }; struct nv20_grctx_table : public nv_region { - nv20_grctx_table(nv_ramin* ramin) + nv20_grctx_table(std::shared_ptr<nv_ramin> ramin) + : nv_region(dev) { - ptr = ramin->ptr + (ramin->dev->rd32(NV20_PGRAPH_CHANNEL_CTX_TABLE) << 4); + ptr = ramin->ptr + (dev->rd32(NV20_PGRAPH_CHANNEL_CTX_TABLE) << 4); size = 4 * 32; } }; + struct nv_grctx : public nv_region + { + nv_grctx(nv_device* dev, unsigned offset) + : nv_region(dev) + { + ptr = dev->ramin->ptr + offset; + size = dev->grctx_size; + } + }; + + struct nv_fifoctx : public nv_region + { + nv_fifoctx(std::shared_ptr<nv_ramin> ramin, uint32_t offset) + : nv_region(ramin->dev) + { + assert(dev->card_type >= NV_50); + ptr = dev->ramin->ptr + offset; + size = dev->fifoctx_size; + } + + nv_fifoctx(std::shared_ptr<nv_ramfc> ramfc, unsigned channel) + : nv_region(ramfc->dev) + { + assert(dev->card_type < NV_50); + ptr = ramfc->ptr + dev->fifoctx_size * channel; + size = dev->fifoctx_size; + } + }; + + struct nv_hwchannel + { + nv_device* dev; + unsigned channel; + + std::shared_ptr<nv_user> user; + std::shared_ptr<nv_fifoctx> fifoctx; + std::shared_ptr<nv_grctx> grctx; + std::shared_ptr<nv_ramht> ramht; + + nv_hwchannel(nv_device* dev, unsigned channel, std::shared_ptr<nv_fifoctx> fifoctx = std::shared_ptr<nv_fifoctx>()) + : dev(dev), channel(channel), fifoctx(fifoctx) + { + assert(channel < dev->channels); + user.reset(dev->users->user[channel]); + + if(!this->fifoctx) + this->fifoctx.reset(dev->fifoctx(channel)); + + if(dev->card_type >= NV_20) { + unsigned offset; + if(dev->card_type >= NV_50) { + if(dev->chipset < 0x60) + offset = fifoctx->offset_in(*dev->ramin) + 0x200; + else + offset = fifoctx->rd32(0x98) << 12; + } else if(dev->card_type == NV_40) { + assert(dev->fifoctx_grctx >= 0); + offset = fifoctx->rd32(56) << 4; + } else + offset = dev->grctx_table->rd32(channel * 4) << 4; + grctx.reset(new nv_grctx(dev, offset)); + } + + if(dev->card_type < NV_50) + ramht = dev->ramht; + else + ramht.reset(new nv_ramht(dev->ramin, (fifoctx->rd32(0x80) & 0xffffff) << 4)); + } + + bool enabled() const + { + return dev->is_channel_enabled(channel); + } + + bool get_grclasses(unsigned grclasses[8]) const + { + if(os->nv_driver == NV_DRIVER_NVIDIA) { + memcpy(grclasses, nvidia_grclasses + 8 * dev->card_type, 8 * sizeof(unsigned)); + } else { + if(!dev->grctx_grclasses) + return false; + if(!grctx) + return false; + for(unsigned i = 0; i < 8; ++i) + grclasses[i] = grctx->rd32(dev->grctx_grclasses + i * 4) & 0xffff; + } + return true; + } + + // XXX: these need to be revisited, since we cannot read/write fifoctx while the context is running! + // TODO: we should read/write from pfifo directly if the channel is running + + uint32_t rd_get() const + { + return user->rd32(0x44); + } + + void wr_get(uint32_t value) const + { + user->wr32(0x44, value); + } + + uint32_t rd_put() const + { + /* user put always reads as 0 pre-nv40 */ + if(dev->card_type < NV_40) + return fifoctx->rd32(0); + else + return user->rd32(0x40); + } + + void wr_put(uint32_t value) const + { + user->wr32(0x40, value); + } + + uint32_t rd_dma() const + { + unsigned dma = fifoctx->rd32(dev->fifoctx_fifo) << 4; + if(dev->card_type < NV_40) + dma &= 0xfffff; + return dma; + } + + void wr_dma(uint32_t v) + { + fifoctx->wr32(dev->fifoctx_fifo, v >> 4); + } + }; + struct pci_device* pci; uint64_t vram_phys; @@ -774,14 +878,19 @@ struct nv_device : public nv_region unsigned grctx_size; - std::unique_ptr<nv_ramin> ramin; - std::unique_ptr<nv_ramht> ramht; - std::unique_ptr<nv_ramfc> ramfc; - std::unique_ptr<nv_ramro> ramro; - std::unique_ptr<nv_users> users; - std::unique_ptr<nv20_grctx_table> grctx_table; + unsigned fifoctx_fifo; + int fifoctx_grctx; + unsigned fifoctx_size; + + std::shared_ptr<nv_ramin> ramin; + std::shared_ptr<nv_ramht> ramht; + std::shared_ptr<nv_ramfc> ramfc; + std::shared_ptr<nv_ramro> ramro; + std::shared_ptr<nv_users> users; + std::shared_ptr<nv20_grctx_table> grctx_table; nv_device(const char* pciname) + : nv_region(this) { if(pciname) { unsigned domain, bus, devid, func; @@ -869,14 +978,37 @@ struct nv_device : public nv_region if(vram_mappable_size < pci->regions[1].size) vram_mappable_size = pci->regions[1].size; + if (chipset >= 0x50) + fifoctx_size = 256; + else if (chipset >= 0x40) + fifoctx_size = 128; + else if (chipset >= 0x17) + fifoctx_size = 64; + else + fifoctx_size = 32; + + if(dev->card_type < NV_10) + fifoctx_fifo = 8; + else if(dev->card_type < NV_50) + fifoctx_fifo = 12; + else + fifoctx_fifo = 0x48; + ramin.reset(new nv_ramin(this)); - ramfc.reset(new nv_ramfc(&*ramin)); - ramht.reset(new nv_ramht(&*ramin)); - ramro.reset(new nv_ramro(&*ramin)); users.reset(new nv_users(this)); + if(card_type < NV_50) { + ramfc.reset(new nv_ramfc(ramin)); + ramht.reset(new nv_ramht(ramin)); + ramro.reset(new nv_ramro(ramin)); + } + + if(dev->card_type == NV_40) + fifoctx_grctx = 56; + else + fifoctx_grctx = -1; if(card_type == NV_20 || card_type == NV_30) - grctx_table.reset(new nv20_grctx_table(&*ramin)); + grctx_table.reset(new nv20_grctx_table(ramin)); init_grctx_info(); grctx_grclasses = -1; @@ -894,7 +1026,7 @@ struct nv_device : public nv_region return new nv_device(getenv("NV_DEVICE")); } - bool is_channel_enabled(unsigned channel) + bool is_channel_enabled(unsigned channel) const { if(card_type < NV_50) return !!(rd32(NV04_PFIFO_MODE) & (1 << channel)); @@ -902,57 +1034,31 @@ struct nv_device : public nv_region return !!(rd32(NV50_PFIFO_CTX_TABLE(channel)) & NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED); } - uint32_t rd_get(unsigned channel) - { - return users->user[channel]->rd32(0x44); - } - - void wr_get(unsigned channel, uint32_t value) - { - users->user[channel]->wr32(0x44, value); - } - - uint32_t rd_put(unsigned channel) - { - /* user put always reads as 0 pre-nv40 */ - if(card_type < NV_40) - return ramfc->fc[channel]->rd32(0); // TODO: what if the channel is executing right now? + std::shared_ptr<nv_hwchannel> hwchannel(unsigned channel) const { + std::shared_ptr<nv_fifoctx> fifoctx(this->fifoctx(channel)); + if(fifoctx) + return std::shared_ptr<nv_hwchannel>(new nv_hwchannel(const_cast<nv_device*>(this), channel, fifoctx)); else - return users->user[channel]->rd32(0x40); + return std::shared_ptr<nv_hwchannel>(); } - void wr_put(unsigned channel, uint32_t value) + nv_fifoctx* fifoctx(unsigned channel) const { - users->user[channel]->wr32(0x40, value); - } - - uint32_t get_grctx(unsigned channel) { - // TODO: nv50 is at "hdr" in channel-private RAMIN - if(ramfc->fc_grctx >= 0) /* NV40 */ - return ramfc->fc[channel]->rd32(ramfc->fc_grctx) << 4; - else if(grctx_table) /* NV20-NV30 */ - return grctx_table->rd32(channel * 4) << 4; - else - return 0; - } - - bool get_grclasses(unsigned channel, unsigned grclasses[8]) - { - if(os->nv_driver == NV_DRIVER_NVIDIA) { - memcpy(grclasses, nvidia_grclasses + 8 * card_type, 8 * sizeof(unsigned)); - } else { - if(!grctx_grclasses) - return false; - uint32_t grctx = get_grctx(channel); - if(!grctx) - return false; - for(unsigned i = 0; i < 8; ++i) - grclasses[i] = ramin->rd32(grctx + grctx_grclasses + i * 4) & 0xffff; + if(card_type < NV_50) + return new nv_fifoctx(ramfc, channel); + else { + uint32_t v = rd32(NV50_PFIFO_CTX_TABLE(channel)); + if(!(v & NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED)) + return 0; + uint32_t fc; + if(chipset == 0x50) + fc = v << 12; + else + fc = v << 8; + return new nv_fifoctx(ramin, fc); } - return true; } - private: void init_grctx_info() { if(card_type == NV_50) @@ -1037,7 +1143,9 @@ private: typedef nv_device::nv_ramht nv_ramht; typedef nv_device::nv_ramin nv_ramin; typedef nv_device::nv_ramfc nv_ramfc; -typedef nv_device::nv_fc nv_fc; +typedef nv_device::nv_grctx nv_grctx; +typedef nv_device::nv_fifoctx nv_fifoctx; +typedef nv_device::nv_hwchannel nv_hwchannel; typedef nv_device::nv_users nv_users; typedef nv_device::nv_user nv_user; typedef nv_device::nv_object nv_object; @@ -1050,14 +1158,14 @@ std::ostream& operator <<(std::ostream& out, const std::pair<nv_device*, nv_ramh out << (unsigned)entry.channel << ':' << hex08 << entry.handle << " @ " << hex08 << entry.instance << ": "; - nv_object obj(&*dev->ramin, entry.instance); + nv_object obj(dev->ramin, entry.instance); out << obj; return out; } int nv_find_idle_channel(struct nv_device* dev) { - bool enabled[128]; + std::shared_ptr<nv_hwchannel> hwchan[128]; unsigned gets1[128]; unsigned puts1[128]; unsigned gets2[128]; @@ -1065,48 +1173,51 @@ int nv_find_idle_channel(struct nv_device* dev) int idle = -1; for(unsigned i = 0; i < dev->channels; ++i) { + bool enabled; if(i == 0 || i == 30 || i == 127) - enabled[i] = 0; + enabled = false; else - enabled[i] = dev->is_channel_enabled(i); - if(enabled[i]) { - gets1[i] = dev->rd_get(i); - puts1[i] = dev->rd_put(i); + enabled = dev->is_channel_enabled(i); + if(enabled) { + hwchan[i] = dev->hwchannel(i); + gets1[i] = hwchan[i]->rd_get(); + puts1[i] = hwchan[i]->rd_put(); } } sleep(1); for(unsigned i = 0; i < dev->channels; ++i) { - if(enabled[i]) { - enabled[i] = dev->is_channel_enabled(i); - if(enabled[i]) { - gets2[i] = dev->rd_get(i); - puts2[i] = dev->rd_put(i); + if(hwchan[i]) { + if(!dev->is_channel_enabled(i)) + hwchan[i].reset(); + if(hwchan[i]) { + gets2[i] = hwchan[i]->rd_get(); + puts2[i] = hwchan[i]->rd_put(); } } } for(unsigned i = 0; i < dev->channels; ++i) { - if(enabled[i] && gets1[i] && gets1[i] == puts1[i] && gets2[i] == puts2[i] && gets1[i] == gets2[i]) { + if(hwchan[i] && gets1[i] && gets1[i] == puts1[i] && gets2[i] == puts2[i] && gets1[i] == gets2[i]) { idle = i; } } return idle; } -bool nv_find_vram_ramht_entry(struct nv_device* dev, int channel, nv_ramht_entry& ret_entry) +bool nv_find_vram_ramht_entry(std::shared_ptr<nv_ramht> ramht, int channel, nv_ramht_entry& ret_entry) { int64_t best_limit = -1; - for(unsigned i = 0; i < dev->ramht->entries; ++i) { - nv_ramht_entry entry = dev->ramht->get_at(i); + for(unsigned i = 0; i < ramht->entries; ++i) { + nv_ramht_entry entry = ramht->get_at(i); if(!entry.valid) continue; if(channel >= 0 && entry.channel != channel) continue; - nv_object obj(&*dev->ramin, entry.instance); + nv_object obj(ramht->dev->ramin, entry.instance); if(obj.type == nv_object::dma_linear && obj.dma_target == nv_object::vram && obj.dma_base == 0 && obj.dma_limit > best_limit) { best_limit = obj.dma_limit; @@ -1117,11 +1228,10 @@ bool nv_find_vram_ramht_entry(struct nv_device* dev, int channel, nv_ramht_entry } struct nv_channel { - struct nv_device* dev; - int channel; + std::shared_ptr<nv_hwchannel> hwchan; - nv_channel(struct nv_device* dev, int channel) - : dev(dev), channel(channel) + nv_channel(std::shared_ptr<nv_hwchannel> hwchan) + : hwchan(hwchan) {} void out(uint32_t v) @@ -1139,8 +1249,8 @@ struct nv_channel_direct : public nv_channel { uint32_t put; - nv_channel_direct(struct nv_device* dev, int channel) - : nv_channel(dev, channel) + nv_channel_direct(std::shared_ptr<nv_hwchannel> hwchan) + : nv_channel(hwchan) { } @@ -1148,7 +1258,7 @@ struct nv_channel_direct : public nv_channel { uint32_t get; while(get != put) - get = dev->rd_get(channel); + get = hwchan->rd_get(); } }; @@ -1165,25 +1275,25 @@ struct nv_channel_vram : public nv_channel_direct uint32_t base; uint32_t our_put; - nv_channel_vram(struct nv_device* dev, int channel) - : nv_channel_direct(dev, channel) + nv_channel_vram(std::shared_ptr<nv_hwchannel> hwchan) + : nv_channel_direct(hwchan) { nv_ramht_entry entry; taken = false; - if(!nv_find_vram_ramht_entry(dev, channel, entry)) + if(!nv_find_vram_ramht_entry(hwchan->ramht, hwchan->channel, entry)) throw std::runtime_error("Unable to find a vram ctxdma"); vram_ctxdma = entry.instance; vram_handle = entry.handle; /* the middle of vram is hopefully away from anything critical */ - put = dev->rd_put(channel); - our_put = base = dev->vram_mappable_size / 2; + put = hwchan->rd_put(); + our_put = base = hwchan->dev->vram_mappable_size / 2; } virtual void outp(void* buf, int dwords) { - os->memcpy_to_phys(dev->vram_phys + our_put, (const char*)buf, dwords * 4); + os->memcpy_to_phys(hwchan->dev->vram_phys + our_put, (const char*)buf, dwords * 4); our_put += dwords * 4; } @@ -1191,22 +1301,22 @@ struct nv_channel_vram : public nv_channel_direct { if(put != our_put) { if(!taken) { - orig_getput = dev->rd_put(channel); + orig_getput = hwchan->rd_put(); put = base; - orig_ctxdma = dev->ramfc->fc[channel]->rd32(dev->ramfc->fc_fifo) << 4; - if(dev->card_type < NV_40) - orig_ctxdma &= 0xfffff; - dev->ramfc->fc[channel]->wr32(dev->ramfc->fc_fifo, vram_ctxdma >> 4); + orig_ctxdma = hwchan->rd_dma(); + hwchan->wr_dma(vram_ctxdma); - dev->ramfc->fc[channel]->wr32(0, put); - dev->ramfc->fc[channel]->wr32(4, put); + /* TODO: what if it is being run now? + * TODO: move this logic to hwchan? */ + hwchan->fifoctx->wr32(0, put); + hwchan->fifoctx->wr32(4, put); //dev->wr_get(channel, put); - dev->wr_put(channel, put); + hwchan->wr_put(put); taken = true; } put = our_put; - dev->wr_put(channel, put); + hwchan->wr_put(put); } } @@ -1215,25 +1325,24 @@ struct nv_channel_vram : public nv_channel_direct } }; -struct nv_channel_runner +struct nv_runner { virtual void run() = 0; }; struct nv_channel_parasite : public nv_channel_direct { - std::unique_ptr<nv_object> dmaobj; + std::shared_ptr<nv_object> dmaobj; uint32_t dmactx; uint32_t our_put; uint32_t orig_getput; - nv_channel_runner* runner; + nv_runner* runner; - nv_channel_parasite(struct nv_device* dev, int channel, nv_channel_runner* runner) - : nv_channel_direct(dev, channel), runner(runner) + nv_channel_parasite(std::shared_ptr<nv_hwchannel> hwchan, nv_runner* runner) + : nv_channel_direct(hwchan), runner(runner) { - uint32_t dmactx = dev->ramfc->fc[channel]->rd32(dev->ramfc->fc_fifo) << 4; - dmaobj.reset(new nv_object(&*dev->ramin, dmactx)); - orig_getput = our_put = put = dev->rd_put(channel); + dmaobj.reset(new nv_object(hwchan->dev->ramin, hwchan->rd_dma())); + orig_getput = our_put = put = hwchan->rd_put(); } virtual void outp(void* buf, int dwords) @@ -1248,7 +1357,7 @@ struct nv_channel_parasite : public nv_channel_direct { if(put != our_put) { put = our_put; - dev->wr_put(channel, put); + hwchan->wr_put(put); } } @@ -1264,18 +1373,19 @@ struct nv_channel_parasite : public nv_channel_direct fire(); wait_idle(); - dev->wr_get(channel, orig_getput); - dev->wr_put(channel, orig_getput); + hwchan->wr_get(orig_getput); + hwchan->wr_put(orig_getput); runner->run(); + wait_idle(); - orig_getput = our_put = put = dev->rd_put(channel); + orig_getput = our_put = put = hwchan->rd_put(); } } }; /* Channel implementation based on stealing the channel of a spawned GL process */ -struct nv_gl_process : public nv_channel_runner +struct nv_gl_process : public nv_runner { pid_t pid; @@ -5,16 +5,22 @@ int main(int argc, char** argv) { unique_ptr<nv_device> dev(nv_device::open_default()); + unsigned chans = 0; + for(unsigned i = 0; i < dev->channels; ++i) { - if(i) + shared_ptr<nv_hwchannel> hwchan = dev->hwchannel(i); + if(!hwchan) + continue; + if(chans) cout << '\n'; - cout << "Channel " << i ; - for(unsigned j = 0; j < dev->ramfc->fc_size; j += 4) { + cout << "Channel " << dec << i << " at " << hex08 << hwchan->fifoctx->offset_in(*dev->ramin); + for(unsigned j = 0; j < dev->fifoctx_size; j += 4) { if(!(j & 15)) cout << endl; - cout << hex08 << dev->ramfc->fc[i]->rd32(j) << ' '; + cout << hex08 << hwchan->fifoctx->rd32(j) << ' '; } cout << endl; + ++chans; } return 0; } @@ -16,26 +16,25 @@ int main(int argc, char** argv) ostringstream ss; //ss << "RAMHT[" << dec << i << "] " << make_pair(&*dev, entry); ss << (unsigned)entry.channel << ":" << hex08 << entry.handle; - nv_object obj(&*dev->ramin, entry.instance); + nv_object obj(dev->ramin, entry.instance); objects.push_back(make_tuple(entry.instance, entry.instance + obj.size, ss.str())); } for(unsigned i = 0; i < dev->channels; ++i) { - unsigned grctx = dev->get_grctx(i); - if(!grctx) + shared_ptr<nv_hwchannel> hwchan = dev->hwchannel(i); + if(!hwchan) continue; - ostringstream ss; - ss << "GRCTX(" << i << ")"; - objects.push_back(make_tuple(grctx, grctx + dev->grctx_size, ss.str())); - } - - for(unsigned i = 0; i < dev->channels; ++i) { - int fc_offset = dev->ramfc->fc[i]->offset_in(*dev->ramin); - if(fc_offset >= 0) { + if(hwchan->grctx) { + unsigned offset = hwchan->grctx->offset_in(*dev->ramin); ostringstream ss; - ss << "RAMFC(" << i << ")"; - objects.push_back(make_tuple((unsigned)fc_offset, (unsigned)(fc_offset + dev->ramfc->fc[i]->size), ss.str())); + ss << "GRCTX(" << i << ")"; + objects.push_back(make_tuple(offset, offset + hwchan->grctx->size, ss.str())); } + + ostringstream ss; + ss << "FIFOCTX(" << i << ")"; + unsigned offset = hwchan->fifoctx->offset_in(*dev->ramin); + objects.push_back(make_tuple(offset, offset + hwchan->fifoctx->size, ss.str())); } int ramht_offset = dev->ramht->offset_in(*dev->ramin); |