summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Barbieri <luca@luca-barbieri.com>2010-03-29 03:01:01 +0200
committerLuca Barbieri <luca@luca-barbieri.com>2010-03-29 03:01:01 +0200
commit6dd4ed164f2e0f7a8935c40cdc406ecf23df8d0b (patch)
tree71f968044764e689af5bd6a79b5aadcf03ca16fe
parent74947f4067a6d35c4376aedef4048b48bff4c03f (diff)
lots of changes
-rw-r--r--Makefile4
-rw-r--r--channels.cpp3
-rw-r--r--nvexec_raw.cpp29
-rw-r--r--nvlib.h346
-rw-r--r--ramfc.cpp14
-rw-r--r--ramht.cpp24
6 files changed, 294 insertions, 126 deletions
diff --git a/Makefile b/Makefile
index 5d19a46..2c74dd5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
-CXXFLAGS = -g -O0 -Wall -fmessage-length=0 -fno-inline-functions -fno-inline
+CXXFLAGS = --std=c++0x -g -O0 -Wall -fmessage-length=0 -fno-inline-functions -fno-inline
LIBS = -lpciaccess
-TARGETS = nvexec_raw ramfc channels ramht pgraph peek poke dump dump-nonzero
+TARGETS = nvexec_raw ramht ramin ramfc channels pgraph peek poke dump dump-nonzero
all: $(TARGETS)
diff --git a/channels.cpp b/channels.cpp
index 65714be..b0716ad 100644
--- a/channels.cpp
+++ b/channels.cpp
@@ -3,8 +3,7 @@ using namespace std;
int main(int argc, char** argv)
{
- std::auto_ptr<nv_device> dev;
- dev.reset(nv_device::open_default());
+ unique_ptr<nv_device> dev(nv_device::open_default());
for(unsigned i = 0; i < dev->channels; ++i) {
if(i)
diff --git a/nvexec_raw.cpp b/nvexec_raw.cpp
index fccbbfd..19ff229 100644
--- a/nvexec_raw.cpp
+++ b/nvexec_raw.cpp
@@ -1,33 +1,30 @@
#include "nvlib.h"
#include <fstream>
+using namespace std;
int main(int argc, char** argv)
{
- std::auto_ptr<nv_device> dev;
- std::auto_ptr<nv_gl_process> runner;
- std::auto_ptr<nv_channel> chan;
- dev.reset(nv_device::open_default());
-
- runner.reset(new nv_gl_process(&*dev));
+ unique_ptr<nv_device> dev(nv_device::open_default());
+ unique_ptr<nv_gl_process> runner(new nv_gl_process(&*dev));
int channel = nv_find_idle_channel(&*dev);
if(channel < 0)
- throw std::runtime_error("Unable to find idle channel for GL process.\n");
+ throw runtime_error("Unable to find idle channel for GL process.\n");
- chan.reset(new nv_channel_vram(&*dev, channel));
+ unique_ptr<nv_channel> chan(new nv_channel_vram(&*dev, channel));
//chan.reset(new nv_channel_parasite(&*dev, channel, &*runner));
- std::cerr << "Channel " << chan->channel << std::endl;
+ cerr << "Channel " << chan->channel << endl;
unsigned grclasses[8];
dev->get_grclasses(chan->channel, grclasses);
// for(unsigned i = 0; i < 8; ++i)
-// std::cout << grclasses[i] << std::endl;
+// cout << grclasses[i] << endl;
- std::vector<std::pair<unsigned, unsigned> > commands;
- std::ifstream fin;
- std::istream& in = argc >= 2 ? fin : std::cin;
+ vector<pair<unsigned, unsigned> > commands;
+ ifstream fin;
+ istream& in = argc >= 2 ? fin : cin;
if(argc >= 2)
fin.open(argv[1]);
@@ -44,13 +41,13 @@ int main(int argc, char** argv)
break;
}
// if(subc == 8) {
-// std::cerr << "Unable to find subchannel for grclass " << grclass << std::endl;
+// cerr << "Unable to find subchannel for grclass " << grclass << endl;
// return 1;
// }
- //std::cout << subc << ' ' << method << ' ' << value << std::endl;
+ //cout << subc << ' ' << method << ' ' << value << endl;
- commands.push_back(std::make_pair(RING(subc, method, 1), value));
+ commands.push_back(make_pair(RING(subc, method, 1), value));
}
for(unsigned i = 0; i < commands.size(); ++i)
diff --git a/nvlib.h b/nvlib.h
index 4ca058c..dc62ce7 100644
--- a/nvlib.h
+++ b/nvlib.h
@@ -20,6 +20,7 @@
#include <iostream>
#include <iomanip>
#include <ios>
+#include <boost/io/ios_state.hpp>
#include <sstream>
#include <memory>
#include <vector>
@@ -30,6 +31,12 @@
#include <nouveau/nouveau_class.h>
#define PAGE_SIZE 4096
+inline std::ostream&
+hex08(std::ostream& out)
+{
+ return out << std::hex << std::setw(8) << std::setfill('0');
+}
+
#define NV_PFIFO_RAMHT 0x2210
enum nouveau_card_type {
@@ -236,6 +243,14 @@ unsigned nvidia_grclasses[8 * 6] = {
0,
};
+#define NV20_GRCTX_SIZE (3580*4)
+#define NV25_GRCTX_SIZE (3529*4)
+#define NV2A_GRCTX_SIZE (3500*4)
+
+#define NV30_31_GRCTX_SIZE (24392)
+#define NV34_GRCTX_SIZE (18140)
+#define NV35_36_GRCTX_SIZE (22396)
+
struct nv_ramht_entry
{
bool valid;
@@ -243,6 +258,19 @@ struct nv_ramht_entry
unsigned char engine;
unsigned handle;
unsigned instance;
+
+ static bool channel_handle_less(const nv_ramht_entry& a, const nv_ramht_entry& b)
+ {
+ if(a.channel != b.channel)
+ return a.channel < b.channel;
+ else
+ return a.handle < b.handle;
+ }
+
+ static bool instance_less(const nv_ramht_entry& a, const nv_ramht_entry& b)
+ {
+ return a.instance < b.instance;
+ }
};
struct nv_region
@@ -258,25 +286,28 @@ struct nv_region
: ptr(ptr), size(size)
{}
- uint32_t rd32(uint32_t off)
+ uint32_t rd32(uint32_t off) const
{
return *(volatile uint32_t*)(ptr + off);
}
- void wr32(uint32_t off, uint32_t value)
+ void wr32(uint32_t off, uint32_t value) const
{
*(volatile uint32_t*)(ptr + off) = value;
}
-};
-struct nv_device : public nv_region
-{
- static bool is_dma_class(unsigned word)
+ int offset_in(const nv_region& container) const
{
- unsigned type = word & 0xff;
- return (type == 0x3d || type == 2 || type == 3);
+ if(container.ptr > ptr)
+ return -1;
+ if((ptr + size) > (container.ptr + container.size))
+ return -1;
+ return ptr - container.ptr;
}
+};
+struct nv_device : public nv_region
+{
struct nv_ramin : public nv_region
{
struct nv_device* dev;
@@ -437,87 +468,140 @@ struct nv_device : public nv_region
}
};
- struct nv_dma_object : public nv_region
+ struct nv_object : public nv_region
{
- struct nv_ramin* ramin;
- uint32_t tag;
- unsigned limit;
-
- bool is_linear()
+ enum nv_object_type
{
- return (tag & (1 << 13)) != 0;
- }
+ grobj,
+ dma_paged,
+ dma_linear
+ };
- bool is_present()
+ enum nv_target
{
- return (tag & (1 << 12)) != 0;
- }
+ vram,
+ vram_tiled,
+ pci,
+ gart
+ };
- unsigned type()
- {
- return (tag >> 16) & 3;
- }
+ struct nv_ramin* ramin;
+ nv_object_type type;
+ uint32_t tag;
+ int64_t dma_base;
+ int64_t dma_limit;
+ bool dma_present;
+ nv_target dma_target;
- const char* type_str()
+ static const char* dma_target_str(nv_target dma_target)
{
- const char* type_strs[4] = {"VRAM", "VRAM_TILED", "PCI", "AGP"};
- return type_strs[this->type()];
+ const char* dma_target_strs[4] = {"VRAM", "VRAM_TILED", "PCI", "AGP"};
+ return dma_target_strs[dma_target];
}
- nv_dma_object(struct nv_ramin* ramin, uint32_t offset)
+ nv_object(struct nv_ramin* ramin, uint32_t offset)
: ramin(ramin)
{
assert (ramin->dev->card_type < NV_50);
ptr = ramin->ptr + offset;
-
- tag = rd32(0);
- limit = rd32(4);
- if(!is_present())
- size = 8;
- else if(is_linear())
- size = 12;
- else
- size = (((limit + 0xfff) >> 12) + 2) * 4;
+ tag = rd32(0);
+ dma_base = -1;
+ dma_limit = -1;
+ unsigned objclass = tag & 0xff;
+ if((objclass == 0x3d || objclass == 2 || objclass == 3)) {
+ if(ramin->dev->card_type < NV_50) {
+ type = (tag & (1 << 13)) ? dma_linear : dma_paged;
+ dma_limit = rd32(4);
+ dma_target = (nv_target)((tag >> 16) & 3);
+ dma_present = !!(tag & (1 << 12));
+ if(!dma_present) /* TODO: guess */
+ size = 8;
+ else if(type == dma_linear) {
+ dma_base = (rd32(8) & ~0xfff) | (tag & 0xff000000);
+ size = 12;
+ } else
+ size = (((dma_limit + 0xfff) >> 12) + 2) * 4;
+ } else {
+ /* TODO: nv50 non-linear ctxdmas? flags? */
+ unsigned v3 = rd32(12);
+ dma_limit = rd32(4) | ((uint64_t)(v3 & 0xff000000) << 8);
+ dma_base = rd32(8) | ((uint64_t)(v3 & 0xff) << 32);
+ size = 24;
+ // TODO: this is almost surely incomplete
+ type = dma_linear;
+ dma_present = true;
+ if(tag & 0xf0000)
+ dma_target = vram;
+ else
+ dma_target = gart;
+ }
+ } else {
+ type = grobj;
+ /*XXX: dodgy hack for now */
+ if (ramin->dev->card_type >= NV_50)
+ size = 24;
+ else if (ramin->dev->card_type >= NV_40)
+ size = 32;
+ else
+ size = 16;
+ }
}
- uint64_t dma_to_phys(uint32_t off)
+ uint64_t dma_to_linear(uint32_t off) const
{
uint32_t size;
uint64_t addr;
+ assert(type != grobj);
+
if (ramin->dev->card_type < NV_50) {
- uint32_t hdr = rd32(0);
- unsigned type;
- size = rd32(4) + 1;
+ size = dma_limit + 1;
if(off >= size)
return ~0ULL;
- if(!(hdr & (1 << 12)))
- throw std::runtime_error("pagetable not present");
-
- // std::cout << "FOO " << std::hex << hdr << std::endl;
+ if(!dma_present)
+ return ~0ULL;
- type = (hdr >> 16) & 3;
- if(type == NV_DMA_TARGET_VIDMEM)
- addr = ramin->dev->vram_phys;
- else if(type == NV_DMA_TARGET_PCI)
- addr = 0;
- else if(type == NV_DMA_TARGET_AGP)
- //throw std::runtime_error("AGP not implemented");
- addr = 0;
+ if(type == dma_paged) {
+ uint64_t pte = rd_pte(off >> 12);
+ addr += (pte & ~0xfff) + (off & 0xfff);
+ } else if(type == dma_linear)
+ return dma_base + off;
else
assert(0);
+ } else {
+ assert(type == dma_linear);
+ return dma_base + off;
+ }
- if(!is_linear()) {
- /* not linear */
- uint64_t pte = read_pte(off >> 12);
- addr += (pte & ~0xfff) + (off & 0xfff);
- } else
- addr += (read_pte(0) & ~0xfff) + off;
- } else
+ return addr;
+ }
+
+ /* TODO: this probably should be redesigned */
+ uint64_t dma_to_phys(uint32_t off) const
+ {
+ uint64_t addr;
+
+ if (ramin->dev->card_type < NV_50) {
+ uint64_t addr = dma_to_linear(off);
+ if(addr == ~0ULL)
+ return addr;
+
+ if(dma_target == NV_DMA_TARGET_VIDMEM) {
+ if(addr >= ramin->dev->vram_mappable_size)
+ return ~0ULL;
+ else
+ return addr + ramin->dev->vram_phys;
+ } else if(dma_target == NV_DMA_TARGET_PCI)
+ return addr;
+ else if(dma_target == NV_DMA_TARGET_AGP)
+ throw std::runtime_error("AGP not implemented");
+ else
+ assert(0);
+ } else /* TODO: VM... */
assert(0);
return addr;
@@ -525,14 +609,40 @@ struct nv_device : public nv_region
unsigned num_ptes()
{
+ assert(type == dma_paged);
return (size >> 2) - 2;
}
- uint64_t read_pte(unsigned i)
+ uint64_t rd_pte(unsigned i) const
{
- assert(ramin->dev->card_type < NV_50);
+ assert(type == dma_paged);
return rd32(8 + i * 4) | (tag & 0xff000000);
}
+
+ void wr_pte_present_wr(unsigned i, uint64_t offset) const {
+ assert(type == dma_paged);
+ wr32(8 + i * 4, offset | 3);
+ }
+
+ void print(std::ostream& out) const {
+ boost::io::ios_all_saver ias(out);
+
+ if(type == grobj) {
+ out << "GR";
+ for(unsigned i = 0; i < size; i += 4)
+ out << ' ' << hex08 << rd32(i);
+ } else {
+ out << dma_target_str(dma_target) << ' ' << hex08 << tag << ' ' << hex08 << dma_limit;
+ if(type == dma_linear)
+ out << " -> " << hex08 << dma_base;
+ }
+ }
+
+ friend std::ostream& operator <<(std::ostream& out, const nv_object& obj)
+ {
+ obj.print(out);
+ return out;
+ }
};
struct nv_ramfc : public nv_region
@@ -633,6 +743,15 @@ struct nv_device : public nv_region
typedef nv_users::nv_user nv_user;
+ struct nv_ramro : public nv_region
+ {
+ nv_ramro(nv_ramin* ramin)
+ {
+ ptr = ramin->ptr + (ramin->dev->rd32(NV03_PFIFO_RAMRO) << 8);
+ size = 512;
+ }
+ };
+
struct pci_device* pci;
uint64_t vram_phys;
@@ -645,10 +764,13 @@ struct nv_device : public nv_region
unsigned channels;
int grctx_grclasses;
- std::auto_ptr<nv_ramin> ramin;
- std::auto_ptr<nv_ramht> ramht;
- std::auto_ptr<nv_ramfc> ramfc;
- std::auto_ptr<nv_users> users;
+ unsigned grctx_size;
+
+ std::unique_ptr<nv_ramin> ramin;
+ std::unique_ptr<nv_ramht> ramht;
+ std::unique_ptr<nv_ramfc> ramfc;
+ std::unique_ptr<nv_ramro> ramro;
+ std::unique_ptr<nv_users> users;
nv_device(const char* pciname)
{
@@ -741,11 +863,18 @@ struct nv_device : public nv_region
ramin.reset(new nv_ramin(this));
ramfc.reset(new nv_ramfc(&*ramin));
ramht.reset(new nv_ramht(&*ramin));
+ ramro.reset(new nv_ramro(&*ramin));
users.reset(new nv_users(this));
+ init_grctx_info();
+
grctx_grclasses = -1;
- if(os->nv_driver == NV_DRIVER_NOUVEAU)
- grctx_grclasses = 0x40;
+ if(card_type == NV_40) {
+ // TODO: parse ctxprogs to find out
+ if(os->nv_driver == NV_DRIVER_NOUVEAU)
+ grctx_grclasses = 0x40;
+ }
+
}
static nv_device* open_default()
@@ -785,6 +914,13 @@ struct nv_device : public nv_region
users->user[channel]->wr32(0x40, value);
}
+ uint32_t get_grctx(unsigned channel) {
+ if(ramfc->fc_grctx >= 0)
+ return ramfc->fc[channel]->rd32(ramfc->fc_grctx) << 4;
+ else
+ assert(0);
+ }
+
void get_grclasses(unsigned channel, unsigned grclasses[8])
{
if(os->nv_driver == NV_DRIVER_NVIDIA) {
@@ -801,6 +937,47 @@ struct nv_device : public nv_region
private:
+ void init_grctx_info() {
+ if(card_type == NV_50)
+ grctx_size = 0x70000;
+ else if(card_type == NV_40)
+ grctx_size = 175 * 1024;
+ else {
+ switch (chipset) {
+ case 0x20:
+ grctx_size = NV20_GRCTX_SIZE;
+ //ctx_init = nv20_graph_context_init;
+ //idoffs = 0;
+ break;
+ case 0x25:
+ case 0x28:
+ grctx_size = NV25_GRCTX_SIZE;
+ //ctx_init = nv25_graph_context_init;
+ break;
+ case 0x2a:
+ grctx_size = NV2A_GRCTX_SIZE;
+ //ctx_init = nv2a_graph_context_init;
+ //idoffs = 0;
+ break;
+ case 0x30:
+ case 0x31:
+ grctx_size = NV30_31_GRCTX_SIZE;
+ //ctx_init = nv30_31_graph_context_init;
+ break;
+ case 0x34:
+ grctx_size = NV34_GRCTX_SIZE;
+ //ctx_init = nv34_graph_context_init;
+ break;
+ case 0x35:
+ case 0x36:
+ grctx_size = NV35_36_GRCTX_SIZE;
+ //ctx_init = nv35_36_graph_context_init;
+ break;
+ default:
+ grctx_size = 0;
+ }
+ }
+ }
uint64_t mem_fb_amount()
{
uint32_t boot0;
@@ -846,7 +1023,20 @@ typedef nv_device::nv_ramfc nv_ramfc;
typedef nv_device::nv_fc nv_fc;
typedef nv_device::nv_users nv_users;
typedef nv_device::nv_user nv_user;
-typedef nv_device::nv_dma_object nv_dma_object;
+typedef nv_device::nv_object nv_object;
+
+std::ostream& operator <<(std::ostream& out, const std::pair<nv_device*, nv_ramht_entry>& deventry)
+{
+ nv_device* dev = deventry.first;
+ const nv_ramht_entry& entry = deventry.second;
+ boost::io::ios_all_saver ias(out);
+
+ out << (unsigned)entry.channel << ':' << hex08 << entry.handle << " @ " << hex08 << entry.instance << ": ";
+
+ nv_object obj(&*dev->ramin, entry.instance);
+ out << obj;
+ return out;
+}
int nv_find_idle_channel(struct nv_device* dev)
{
@@ -890,7 +1080,7 @@ int nv_find_idle_channel(struct nv_device* dev)
bool nv_find_vram_ramht_entry(struct nv_device* dev, int channel, nv_ramht_entry& ret_entry)
{
- unsigned best_limit = 0;
+ int64_t best_limit = -1;
for(unsigned i = 0; i < dev->ramht->entries; ++i) {
nv_ramht_entry entry = dev->ramht->get_at(i);
if(!entry.valid)
@@ -899,15 +1089,11 @@ bool nv_find_vram_ramht_entry(struct nv_device* dev, int channel, nv_ramht_entry
if(channel >= 0 && entry.channel != channel)
continue;
- uint32_t tag = dev->ramin->rd32(entry.instance);
- if(nv_device::is_dma_class(tag)) {
- std::auto_ptr<nv_dma_object> dmaobj(new nv_dma_object(&*dev->ramin, entry.instance));
-
- if(dmaobj->type() == 0 && dmaobj->is_linear()
- && !(dmaobj->read_pte(0) & ~0xfff) && dmaobj->limit > best_limit) {
- best_limit = dmaobj->limit;
- ret_entry = entry;
- }
+ nv_object obj(&*dev->ramin, entry.instance);
+ if(obj.type == nv_object::dma_linear && obj.dma_target == nv_object::vram
+ && obj.dma_base == 0 && obj.dma_limit > best_limit) {
+ best_limit = obj.dma_limit;
+ ret_entry = entry;
}
}
return !!best_limit;
@@ -1013,7 +1199,7 @@ struct nv_channel_runner
struct nv_channel_parasite : public nv_channel_direct
{
- std::auto_ptr<nv_dma_object> dmaobj;
+ std::unique_ptr<nv_object> dmaobj;
uint32_t dmactx;
uint32_t put;
uint32_t our_put;
@@ -1024,7 +1210,7 @@ struct nv_channel_parasite : public nv_channel_direct
: nv_channel_direct(dev, channel), runner(runner)
{
uint32_t dmactx = dev->ramfc->fc[channel]->rd32(dev->ramfc->fc_fifo) << 4;
- dmaobj.reset(new nv_dma_object(&*dev->ramin, dmactx));
+ dmaobj.reset(new nv_object(&*dev->ramin, dmactx));
orig_getput = our_put = put = dev->rd_put(channel);
}
diff --git a/ramfc.cpp b/ramfc.cpp
index be83816..0ccdd1a 100644
--- a/ramfc.cpp
+++ b/ramfc.cpp
@@ -1,18 +1,20 @@
#include "nvlib.h"
+using namespace std;
int main(int argc, char** argv)
{
- std::auto_ptr<nv_device> dev;
- dev.reset(nv_device::open_default());
+ unique_ptr<nv_device> dev(nv_device::open_default());
for(unsigned i = 0; i < dev->channels; ++i) {
- std::cout << "\nChannel " << i ;
+ if(i)
+ cout << i;
+ cout << "Channel " << i ;
for(unsigned j = 0; j < dev->ramfc->fc_size; j += 4) {
if(!(j & 15))
- std::cout << std::endl;
- std::cout << std::hex << std::setw(8) << dev->ramfc->fc[i]->rd32(j) << ' ';
+ cout << endl;
+ cout << hex08 << dev->ramfc->fc[i]->rd32(j) << ' ';
}
- std::cout << std::endl;
+ cout << endl;
}
return 0;
}
diff --git a/ramht.cpp b/ramht.cpp
index a729692..cc75036 100644
--- a/ramht.cpp
+++ b/ramht.cpp
@@ -1,33 +1,17 @@
#include "nvlib.h"
+#include <boost/io/ios_state.hpp>
using namespace std;
+using namespace boost;
int main(int argc, char** argv)
{
- std::auto_ptr<nv_device> dev;
- dev.reset(nv_device::open_default());
+ unique_ptr<nv_device> dev(nv_device::open_default());
for(unsigned i = 0; i < dev->ramht->entries; ++i) {
nv_ramht_entry entry = dev->ramht->get_at(i);
if(!entry.valid)
continue;
- printf("[%i] %i:%08x @ %08x:", i, entry.channel, entry.handle, entry.instance);
- uint32_t tag = dev->ramin->rd32(entry.instance);
- if(!nv_device::is_dma_class(tag)) {
- uint32_t v[4];
- for(unsigned j = 0; j < 4; ++j)
- v[j] = dev->ramin->rd32(entry.instance + j * 4);
- printf(" GR %08x %08x %08x %08x\n", v[0], v[1], v[2], v[3]);
- } else {
- auto_ptr<nv_dma_object> dmaobj(new nv_dma_object(&*dev->ramin, entry.instance));
- printf(" %s %08x %08x", dmaobj->type_str(), dmaobj->tag, dmaobj->limit);
- if(!dmaobj->is_linear()) {
- printf("\n");
- unsigned ptes = dmaobj->num_ptes();
- for(unsigned i = 0; i < ptes; ++i)
- printf("\t%i -> %08Lx\n", i, dmaobj->read_pte(i));
- } else
- printf(" -> %08Lx\n", dmaobj->read_pte(0));
- }
+ cout << "[" << i << "] " << make_pair(&*dev, entry) << endl;
}
return 0;
}