1 files changed, 310 insertions, 200 deletions
diff --git a/nvlib.h b/nvlib.h
index aa9c8d5..556ab8e 100644
--- a/nvlib.h
+++ b/nvlib.h
@@ -272,17 +272,20 @@ struct nv_ramht_entry
 	}
 };
 
+struct nv_device;
+
 struct nv_region
 {
+	nv_device* dev;
 	char* ptr;
 	size_t size;
 
-	nv_region()
-	: ptr(0), size(0)
+	nv_region(nv_device* dev)
+	: dev(dev), ptr(0), size(0)
 	{}
 
-	nv_region(char* ptr, size_t size)
-	: ptr(ptr), size(size)
+	nv_region(nv_device* dev, char* ptr, size_t size)
+	: dev(dev), ptr(ptr), size(size)
 	{}
 
 	uint32_t rd32(uint32_t off) const
@@ -309,12 +312,10 @@ struct nv_device : public nv_region
 {
 	struct nv_ramin : public nv_region
 	{
-		struct nv_device* dev;
-
 		nv_ramin(struct nv_device* dev)
-		: dev(dev)
+			: nv_region(dev)
 		{
-			/* map larger RAMIN aperture on NV40 cards */
+			/* map larger RAMIN aperture on NV40-NV50 cards */
 			ptr  = NULL;
 			if (dev->card_type >= NV_40) {
 				int ramin_bar = 2;
@@ -338,24 +339,35 @@ struct nv_device : public nv_region
 
 	struct nv_ramht : public nv_region
 	{
-		struct nv_ramin* ramin;
+		std::shared_ptr<nv_ramin> ramin;
 
 		unsigned bits;
 		unsigned entries;
-		unsigned search_shift; // TODO: what is this exactly?
+		//unsigned search_shift; // TODO: what is this exactly?
 	
-		nv_ramht(struct nv_ramin* ramin)
-		: ramin(ramin)
+		nv_ramht(std::shared_ptr<nv_ramin> ramin)
+			: nv_region(ramin->dev), ramin(ramin)
 		{
-			uint32_t reg = ramin->dev->rd32(NV_PFIFO_RAMHT);
+			assert(dev->card_type < NV_50);
+			uint32_t reg = dev->rd32(NV_PFIFO_RAMHT);
 			bits = ((reg >> 16) & 0xf) + 9;
 			ptr = ramin->ptr + ((reg & 0xffff) << 8);
 			entries = 1 << bits;
 			size = entries * 8;
-			search_shift = (reg >> 24) + 4;
+			//search_shift = (reg >> 24) + 4;
+		}
+
+		nv_ramht(std::shared_ptr<nv_ramin> ramin, uint32_t offset)
+			: nv_region(ramin->dev), ramin(ramin)
+		{
+			assert(dev->card_type >= NV_50);
+			ptr = ramin->ptr + offset;
+			bits = 9;
+			entries = 1 << bits;
+			size = entries * 8;
 		}
 
-		uint32_t hash_handle(int channel, uint32_t handle)
+		uint32_t hash_handle(int channel, uint32_t handle) const
 		{
 			uint32_t hash = 0;
 			int i;
@@ -365,14 +377,14 @@ struct nv_device : public nv_region
 				handle >>= bits;
 			}
 
-			if (ramin->dev->card_type < NV_50)
+			if (dev->card_type < NV_50)
 				hash ^= channel << (bits - 4);
 			hash <<= 3;
 
 			return hash;
 		}
 
-		int find(unsigned channel, uint32_t handle, nv_ramht_entry& entry)
+		int find(unsigned channel, uint32_t handle, nv_ramht_entry& entry) const
 		{
 			unsigned start = hash_handle(channel, handle);
 			unsigned i = start;
@@ -395,22 +407,22 @@ struct nv_device : public nv_region
 			return -1;
 		}
 
-		nv_ramht_entry get_at(unsigned i)
+		nv_ramht_entry get_at(unsigned i) const
 		{
 			nv_ramht_entry entry;
 			uint32_t ctx;
 			entry.handle = rd32(i * 8);
 			ctx = rd32(i * 8 + 4);
 
-			if(ramin->dev->card_type < NV_40) {
+			if(dev->card_type < NV_40) {
 				entry.valid = (ctx & (1 << 31)) != 0;
 				entry.engine = (ctx >> 16) & 3;
-				entry.channel = (ctx >> 24) & (ramin->dev->channels - 1);
+				entry.channel = (ctx >> 24) & (dev->channels - 1);
 				entry.instance = (ctx & 0xffff) << 4;
-			} else if(ramin->dev->card_type < NV_50) {
+			} else if(dev->card_type < NV_50) {
 				entry.valid = ctx != 0;
 				entry.engine = (ctx >> 20) & 3;
-				entry.channel = (ctx >> 23) & (ramin->dev->channels - 1);;
+				entry.channel = (ctx >> 23) & (dev->channels - 1);;
 				entry.instance = (ctx & 0xfffff) << 4;
 			} else {
 				entry.valid = ctx != 0;
@@ -427,13 +439,13 @@ struct nv_device : public nv_region
 			return entry;
 		}
 	
-		void set_at(unsigned i, const nv_ramht_entry& entry)
+		void set_at(unsigned i, const nv_ramht_entry& entry) const
 		{
 			uint32_t ctx;
 			wr32(i * 8, entry.handle);
-			if(ramin->dev->card_type < NV_40)
+			if(dev->card_type < NV_40)
 				ctx = (entry.instance >> 4) | (entry.engine << 16) | (entry.channel << 23);
-			else if(ramin->dev->card_type < NV_50)
+			else if(dev->card_type < NV_50)
 				ctx = (entry.instance >> 4) | (entry.engine << 20) | (entry.channel << 24);
 			else if(entry.engine == 2)
 				ctx = (entry.instance << 10) | 2;
@@ -442,13 +454,13 @@ struct nv_device : public nv_region
 			wr32(i * 8 + 4, ctx);
 		}
 	
-		void clear_at(unsigned i)
+		void clear_at(unsigned i) const
 		{
 			wr32(i * 8, 0);
 			wr32(i * 8 + 4, 0);
 		}
 
-		void insert(const nv_ramht_entry& entry)
+		void insert(const nv_ramht_entry& entry) const
 		{
 			nv_ramht_entry cur;
 			int i = find(entry.channel, entry.handle, cur);
@@ -458,7 +470,7 @@ struct nv_device : public nv_region
 			set_at(i, entry);
 		}
 
-		void remove(unsigned channel, uint32_t handle)
+		void remove(unsigned channel, uint32_t handle) const
 		{
 			nv_ramht_entry entry;
 			int i = find(channel, handle, entry);
@@ -484,7 +496,7 @@ struct nv_device : public nv_region
 			gart
 		};
 
-		struct nv_ramin* ramin;
+		std::shared_ptr<nv_ramin> ramin;
 		nv_object_type type;
 		uint32_t tag;
 		int64_t dma_base;
@@ -498,10 +510,10 @@ struct nv_device : public nv_region
 			return dma_target_strs[dma_target];
 		}
 
-		nv_object(struct nv_ramin* ramin, uint32_t offset)
-		: ramin(ramin)
+		nv_object(std::shared_ptr<nv_ramin> ramin, uint32_t offset)
+			: nv_region(ramin->dev), ramin(ramin)
 		{
-			assert (ramin->dev->card_type < NV_50);
+			assert (dev->card_type < NV_50);
 			
 			ptr = ramin->ptr + offset;
 
@@ -510,7 +522,7 @@ struct nv_device : public nv_region
 			dma_limit = -1;
 			unsigned objclass = tag & 0xff;
 			if((objclass == 0x3d || objclass == 2 || objclass == 3)) {
-				if(ramin->dev->card_type < NV_50) {
+				if(dev->card_type < NV_50) {
 					type = (tag & (1 << 13)) ? dma_linear : dma_paged;
 					dma_limit = rd32(4);
 					dma_target = (nv_target)((tag >> 16) & 3);
@@ -539,9 +551,9 @@ struct nv_device : public nv_region
 			} else {
 				type = grobj;
 				/*XXX: dodgy hack for now */
-				if (ramin->dev->card_type >= NV_50)
+				if (dev->card_type >= NV_50)
 					size = 24;
-				else if (ramin->dev->card_type >= NV_40)
+				else if (dev->card_type >= NV_40)
 					size = 32;
 				else
 					size = 16;
@@ -555,7 +567,7 @@ struct nv_device : public nv_region
 
 			assert(type != grobj);
 
-			if (ramin->dev->card_type < NV_50) {
+			if (dev->card_type < NV_50) {
 				size = dma_limit + 1;
 
 				if(off >= size)
@@ -584,16 +596,16 @@ struct nv_device : public nv_region
 		{
 			uint64_t addr;
 
-			if (ramin->dev->card_type < NV_50) {
+			if (dev->card_type < NV_50) {
 				uint64_t addr = dma_to_linear(off);
 				if(addr == ~0ULL)
 					return addr;
 
 				if(dma_target == NV_DMA_TARGET_VIDMEM) {
-					if(addr >= ramin->dev->vram_mappable_size)
+					if(addr >= dev->vram_mappable_size)
 						return ~0ULL;
 					else
-						return addr + ramin->dev->vram_phys;
+						return addr + dev->vram_phys;
 				} else if(dma_target == NV_DMA_TARGET_PCI)
 					return addr;
 				else if(dma_target == NV_DMA_TARGET_AGP)
@@ -606,7 +618,7 @@ struct nv_device : public nv_region
 			return addr;
 		}
 
-		unsigned num_ptes()
+		unsigned num_ptes() const
 		{
 			assert(type == dma_paged);
 			return (size >> 2) - 2;
@@ -646,62 +658,21 @@ struct nv_device : public nv_region
 
 	struct nv_ramfc : public nv_region
 	{
-		struct nv_fc : public nv_region
-		{
-			nv_ramfc* ramfc;
-			
-			nv_fc(nv_ramfc* ramfc, unsigned channel)
-			: ramfc(ramfc)
-			{
-				ptr = ramfc->ptr + ramfc->fc_size * channel;
-				size = ramfc->fc_size;
-			}
-		};
+		std::shared_ptr<nv_ramin> ramin;
 
-		struct nv_ramin* ramin;
-		unsigned fc_size;
-		unsigned fc_fifo;
-		int fc_grctx;
-		struct nv_fc* fc[128];
-	
-		nv_ramfc(struct nv_ramin* ramin)
-		: ramin(ramin)
+		nv_ramfc(std::shared_ptr<nv_ramin> ramin)
+			: nv_region(ramin->dev), ramin(ramin)
 		{
-			if (ramin->dev->chipset >= 0x40)
-				fc_size = 128;
-			else if (ramin->dev->chipset >= 0x17)
-				fc_size = 64;
-			else
-				fc_size = 32;
-
-			if(ramin->dev->card_type >= NV_50)
-				assert(0);
-			else if(ramin->dev->card_type >= NV_40)
+			assert(dev->card_type < NV_50);
+			if(dev->card_type >= NV_40)
 				ptr = ramin->ptr + 0x20000;
 			else
 				ptr = ramin->ptr + 0x11400;
 
-			size = fc_size * ramin->dev->channels;
-
-			if(ramin->dev->card_type < NV_10)
-				fc_fifo = 8;
-			else if(ramin->dev->card_type < NV_50)
-				fc_fifo = 12;
-			else
-				fc_fifo = 0x48;
-
-			if(ramin->dev->card_type == NV_40)
-				fc_grctx = 56;
-			else
-				fc_grctx = -1;
-
-			for(unsigned i = 0; i < ramin->dev->channels; ++i)
-				fc[i] = new nv_fc(this, i);
+			size = dev->fifoctx_size * dev->channels;
 		}
 	};
 
-	typedef nv_ramfc::nv_fc nv_fc;
-
 	struct nv_users : public nv_region
 	{
 		struct nv_user : public nv_region
@@ -709,7 +680,7 @@ struct nv_device : public nv_region
 			nv_users* users;
 			
 			nv_user(nv_users* users, unsigned channel)
-			: users(users)
+				: nv_region(users->dev), users(users)
 			{
 				ptr = users->ptr + users->user_size * channel;
 				size = users->user_size;
@@ -721,6 +692,7 @@ struct nv_device : public nv_region
 		nv_user* user[128];
 
 		nv_users(nv_device* dev)
+			: nv_region(dev)
 		{
 			if (dev->card_type < NV_40) {
 				ptr = dev->ptr + NV03_USER(0);
@@ -744,22 +716,154 @@ struct nv_device : public nv_region
 
 	struct nv_ramro : public nv_region
 	{
-		nv_ramro(nv_ramin* ramin)
+		nv_ramro(std::shared_ptr<nv_ramin> ramin)
+			: nv_region(ramin->dev)
 		{
-			ptr = ramin->ptr + (ramin->dev->rd32(NV03_PFIFO_RAMRO) << 8);
+			ptr = ramin->ptr + (dev->rd32(NV03_PFIFO_RAMRO) << 8);
 			size = 512;
 		}
 	};
 
 	struct nv20_grctx_table : public nv_region
 	{
-		nv20_grctx_table(nv_ramin* ramin)
+		nv20_grctx_table(std::shared_ptr<nv_ramin> ramin)
+			: nv_region(dev)
 		{
-			ptr = ramin->ptr + (ramin->dev->rd32(NV20_PGRAPH_CHANNEL_CTX_TABLE) << 4);
+			ptr = ramin->ptr + (dev->rd32(NV20_PGRAPH_CHANNEL_CTX_TABLE) << 4);
 			size = 4 * 32;
 		}
 	};
 
+	struct nv_grctx : public nv_region
+	{
+		nv_grctx(nv_device* dev, unsigned offset)
+			: nv_region(dev)
+		{
+			ptr = dev->ramin->ptr + offset;
+			size = dev->grctx_size;
+		}
+	};
+
+	struct nv_fifoctx : public nv_region
+	{
+		nv_fifoctx(std::shared_ptr<nv_ramin> ramin, uint32_t offset)
+			: nv_region(ramin->dev)
+		{
+			assert(dev->card_type >= NV_50);
+			ptr = dev->ramin->ptr + offset;
+			size = dev->fifoctx_size;
+		}
+
+		nv_fifoctx(std::shared_ptr<nv_ramfc> ramfc, unsigned channel)
+			: nv_region(ramfc->dev)
+		{
+			assert(dev->card_type < NV_50);
+			ptr = ramfc->ptr + dev->fifoctx_size * channel;
+			size = dev->fifoctx_size;
+		}
+	};
+
+	struct nv_hwchannel
+	{
+		nv_device* dev;
+		unsigned channel;
+
+		std::shared_ptr<nv_user> user;
+		std::shared_ptr<nv_fifoctx> fifoctx;
+		std::shared_ptr<nv_grctx> grctx;
+		std::shared_ptr<nv_ramht> ramht;
+
+		nv_hwchannel(nv_device* dev, unsigned channel, std::shared_ptr<nv_fifoctx> fifoctx = std::shared_ptr<nv_fifoctx>())
+			: dev(dev), channel(channel), fifoctx(fifoctx)
+		{
+			assert(channel < dev->channels);
+			user.reset(dev->users->user[channel]);
+
+			if(!this->fifoctx)
+				this->fifoctx.reset(dev->fifoctx(channel));
+
+			if(dev->card_type >= NV_20) {
+				unsigned offset;
+				if(dev->card_type >= NV_50) {
+					if(dev->chipset < 0x60)
+						offset = fifoctx->offset_in(*dev->ramin) + 0x200;
+					else
+						offset = fifoctx->rd32(0x98) << 12;
+				} else if(dev->card_type == NV_40) {
+					assert(dev->fifoctx_grctx >= 0);
+					offset = fifoctx->rd32(56) << 4;
+				} else
+					offset = dev->grctx_table->rd32(channel * 4) << 4;
+				grctx.reset(new nv_grctx(dev, offset));
+			}
+
+			if(dev->card_type < NV_50)
+				ramht = dev->ramht;
+			else
+				ramht.reset(new nv_ramht(dev->ramin, (fifoctx->rd32(0x80) & 0xffffff) << 4));
+		}
+
+		bool enabled() const
+		{
+			return dev->is_channel_enabled(channel);
+		}
+
+		bool get_grclasses(unsigned grclasses[8]) const
+		{
+			if(os->nv_driver == NV_DRIVER_NVIDIA) {
+				memcpy(grclasses, nvidia_grclasses + 8 * dev->card_type, 8 * sizeof(unsigned));
+			} else {
+				if(!dev->grctx_grclasses)
+					return false;
+				if(!grctx)
+					return false;
+				for(unsigned i = 0; i < 8; ++i)
+					grclasses[i] = grctx->rd32(dev->grctx_grclasses + i * 4) & 0xffff;
+			}
+			return true;
+		}
+
+		// XXX: these need to be revisited, since we cannot read/write fifoctx while the context is running!
+		// TODO: we should read/write from pfifo directly if the channel is running
+
+		uint32_t rd_get() const
+		{
+			return user->rd32(0x44);
+		}
+
+		void wr_get(uint32_t value) const
+		{
+			user->wr32(0x44, value);
+		}
+
+		uint32_t rd_put() const
+		{
+			/* user put always reads as 0 pre-nv40 */
+			if(dev->card_type < NV_40)
+				return fifoctx->rd32(0);
+			else
+				return user->rd32(0x40);
+		}
+
+		void wr_put(uint32_t value) const
+		{
+			user->wr32(0x40, value);
+		}
+
+		uint32_t rd_dma() const
+		{
+			unsigned dma = fifoctx->rd32(dev->fifoctx_fifo) << 4;
+			if(dev->card_type < NV_40)
+				dma &= 0xfffff;
+			return dma;
+		}
+
+		void wr_dma(uint32_t v)
+		{
+			fifoctx->wr32(dev->fifoctx_fifo, v >> 4);
+		}
+	};
+
 	struct pci_device* pci;
 
 	uint64_t vram_phys;
@@ -774,14 +878,19 @@ struct nv_device : public nv_region
 	
 	unsigned grctx_size;
 
-	std::unique_ptr<nv_ramin> ramin;
-	std::unique_ptr<nv_ramht> ramht;
-	std::unique_ptr<nv_ramfc> ramfc;
-	std::unique_ptr<nv_ramro> ramro;
-	std::unique_ptr<nv_users> users;
-	std::unique_ptr<nv20_grctx_table> grctx_table;
+	unsigned fifoctx_fifo;
+	int fifoctx_grctx;
+	unsigned fifoctx_size;
+
+	std::shared_ptr<nv_ramin> ramin;
+	std::shared_ptr<nv_ramht> ramht;
+	std::shared_ptr<nv_ramfc> ramfc;
+	std::shared_ptr<nv_ramro> ramro;
+	std::shared_ptr<nv_users> users;
+	std::shared_ptr<nv20_grctx_table> grctx_table;
 	
 	nv_device(const char* pciname)
+	: nv_region(this)
 	{
 		if(pciname) {
 			unsigned domain, bus, devid, func;
@@ -869,14 +978,37 @@ struct nv_device : public nv_region
 		if(vram_mappable_size < pci->regions[1].size)
 			vram_mappable_size = pci->regions[1].size;
 
+		if (chipset >= 0x50)
+			fifoctx_size = 256;
+		else if (chipset >= 0x40)
+			fifoctx_size = 128;
+		else if (chipset >= 0x17)
+			fifoctx_size = 64;
+		else
+			fifoctx_size = 32;
+
+		if(dev->card_type < NV_10)
+			fifoctx_fifo = 8;
+		else if(dev->card_type < NV_50)
+			fifoctx_fifo = 12;
+		else
+			fifoctx_fifo = 0x48;
+
 		ramin.reset(new nv_ramin(this));
-		ramfc.reset(new nv_ramfc(&*ramin));
-		ramht.reset(new nv_ramht(&*ramin));
-		ramro.reset(new nv_ramro(&*ramin));
 		users.reset(new nv_users(this));
+		if(card_type < NV_50) {
+			ramfc.reset(new nv_ramfc(ramin));
+			ramht.reset(new nv_ramht(ramin));
+			ramro.reset(new nv_ramro(ramin));
+		}
+
+		if(dev->card_type == NV_40)
+			fifoctx_grctx = 56;
+		else
+			fifoctx_grctx = -1;
 
 		if(card_type == NV_20 || card_type == NV_30)
-			grctx_table.reset(new nv20_grctx_table(&*ramin));
+			grctx_table.reset(new nv20_grctx_table(ramin));
 		init_grctx_info();
 
 		grctx_grclasses = -1;
@@ -894,7 +1026,7 @@ struct nv_device : public nv_region
 		return new nv_device(getenv("NV_DEVICE"));
 	}
 
-	bool is_channel_enabled(unsigned channel)
+	bool is_channel_enabled(unsigned channel) const
 	{
 		if(card_type < NV_50)
 			return !!(rd32(NV04_PFIFO_MODE) & (1 << channel));
@@ -902,57 +1034,31 @@ struct nv_device : public nv_region
 			return !!(rd32(NV50_PFIFO_CTX_TABLE(channel)) & NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED);
 	}
 
-	uint32_t rd_get(unsigned channel)
-	{
-		return users->user[channel]->rd32(0x44);
-	}
-
-	void wr_get(unsigned channel, uint32_t value)
-	{
-		users->user[channel]->wr32(0x44, value);
-	}
-
-	uint32_t rd_put(unsigned channel)
-	{
-		/* user put always reads as 0 pre-nv40 */
-		if(card_type < NV_40)
-			return ramfc->fc[channel]->rd32(0); // TODO: what if the channel is executing right now?
+	std::shared_ptr<nv_hwchannel> hwchannel(unsigned channel) const {
+		std::shared_ptr<nv_fifoctx> fifoctx(this->fifoctx(channel));
+		if(fifoctx)
+			return std::shared_ptr<nv_hwchannel>(new nv_hwchannel(const_cast<nv_device*>(this), channel, fifoctx));
 		else
-			return users->user[channel]->rd32(0x40);
+			return std::shared_ptr<nv_hwchannel>();
 	}
 
-	void wr_put(unsigned channel, uint32_t value)
+	nv_fifoctx* fifoctx(unsigned channel) const
 	{
-		users->user[channel]->wr32(0x40, value);
-	}
-
-	uint32_t get_grctx(unsigned channel) {
-		// TODO: nv50 is at "hdr" in channel-private RAMIN
-		if(ramfc->fc_grctx >= 0) /* NV40 */
-			return ramfc->fc[channel]->rd32(ramfc->fc_grctx) << 4;
-		else if(grctx_table) /* NV20-NV30 */
-			return grctx_table->rd32(channel * 4) << 4;
-		else
-			return 0;
-	}
-
-	bool get_grclasses(unsigned channel, unsigned grclasses[8])
-	{
-		if(os->nv_driver == NV_DRIVER_NVIDIA) {
-			memcpy(grclasses, nvidia_grclasses + 8 * card_type, 8 * sizeof(unsigned));
-		} else {
-			if(!grctx_grclasses)
-				return false;
-			uint32_t grctx = get_grctx(channel);
-			if(!grctx)
-				return false;
-			for(unsigned i = 0; i < 8; ++i)
-				grclasses[i] = ramin->rd32(grctx + grctx_grclasses + i * 4) & 0xffff;
+		if(card_type < NV_50)
+			return new nv_fifoctx(ramfc, channel);
+		else {
+			uint32_t v = rd32(NV50_PFIFO_CTX_TABLE(channel));
+			if(!(v & NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED))
+				return 0;
+			uint32_t fc;
+			if(chipset == 0x50)
+				fc = v << 12;
+			else
+				fc = v << 8;
+			return new nv_fifoctx(ramin, fc);
 		}
-		return true;
 	}
 
-
 private:
 	void init_grctx_info() {
 		if(card_type == NV_50)
@@ -1037,7 +1143,9 @@ private:
 typedef nv_device::nv_ramht nv_ramht;
 typedef nv_device::nv_ramin nv_ramin;
 typedef nv_device::nv_ramfc nv_ramfc;
-typedef nv_device::nv_fc nv_fc;
+typedef nv_device::nv_grctx nv_grctx;
+typedef nv_device::nv_fifoctx nv_fifoctx;
+typedef nv_device::nv_hwchannel nv_hwchannel;
 typedef nv_device::nv_users nv_users;
 typedef nv_device::nv_user nv_user;
 typedef nv_device::nv_object nv_object;
@@ -1050,14 +1158,14 @@ std::ostream& operator <<(std::ostream& out, const std::pair<nv_device*, nv_ramh
 
 	out << (unsigned)entry.channel << ':' << hex08 << entry.handle << " @ " << hex08 << entry.instance << ": ";
 
-	nv_object obj(&*dev->ramin, entry.instance);
+	nv_object obj(dev->ramin, entry.instance);
 	out << obj;
 	return out;
 }
 
 int nv_find_idle_channel(struct nv_device* dev)
 {
-	bool enabled[128];
+	std::shared_ptr<nv_hwchannel> hwchan[128];
 	unsigned gets1[128];
 	unsigned puts1[128];
 	unsigned gets2[128];
@@ -1065,48 +1173,51 @@ int nv_find_idle_channel(struct nv_device* dev)
 	int idle = -1;
 
 	for(unsigned  i = 0; i < dev->channels; ++i) {
+		bool enabled;
 		if(i == 0 || i == 30 || i == 127)
-			enabled[i] = 0;
+			enabled = false;
 		else
-			enabled[i] = dev->is_channel_enabled(i);
-		if(enabled[i]) {
-			gets1[i] = dev->rd_get(i);
-			puts1[i] = dev->rd_put(i);
+			enabled = dev->is_channel_enabled(i);
+		if(enabled) {
+			hwchan[i] = dev->hwchannel(i);
+			gets1[i] = hwchan[i]->rd_get();
+			puts1[i] = hwchan[i]->rd_put();
 		}
 	}
 
 	sleep(1);
 
 	for(unsigned i = 0; i < dev->channels; ++i) {
-		if(enabled[i]) {
-			enabled[i] = dev->is_channel_enabled(i);
-			if(enabled[i]) {
-				gets2[i] = dev->rd_get(i);
-				puts2[i] = dev->rd_put(i);
+		if(hwchan[i]) {
+			if(!dev->is_channel_enabled(i))
+				hwchan[i].reset();
+			if(hwchan[i]) {
+				gets2[i] = hwchan[i]->rd_get();
+				puts2[i] = hwchan[i]->rd_put();
 			}
 		}
 	}
 
 	for(unsigned i = 0; i < dev->channels; ++i) {
-		if(enabled[i] && gets1[i] && gets1[i] == puts1[i] && gets2[i] == puts2[i] && gets1[i] == gets2[i]) {
+		if(hwchan[i] && gets1[i] && gets1[i] == puts1[i] && gets2[i] == puts2[i] && gets1[i] == gets2[i]) {
 			idle = i;
 		}
 	}
 	return idle;
 }
 
-bool nv_find_vram_ramht_entry(struct nv_device* dev, int channel, nv_ramht_entry& ret_entry)
+bool nv_find_vram_ramht_entry(std::shared_ptr<nv_ramht> ramht, int channel, nv_ramht_entry& ret_entry)
 {
 	int64_t best_limit = -1;
-	for(unsigned i = 0; i < dev->ramht->entries; ++i) {
-                nv_ramht_entry entry = dev->ramht->get_at(i);
+	for(unsigned i = 0; i < ramht->entries; ++i) {
+                nv_ramht_entry entry = ramht->get_at(i);
                 if(!entry.valid)
                         continue;
 
 		if(channel >= 0 && entry.channel != channel)
 			continue;
 
-		nv_object obj(&*dev->ramin, entry.instance);
+		nv_object obj(ramht->dev->ramin, entry.instance);
                 if(obj.type == nv_object::dma_linear && obj.dma_target == nv_object::vram
                 	&& obj.dma_base == 0 && obj.dma_limit > best_limit) {
                        	best_limit = obj.dma_limit;
@@ -1117,11 +1228,10 @@ bool nv_find_vram_ramht_entry(struct nv_device* dev, int channel, nv_ramht_entry
 }
 
 struct nv_channel {
-	struct nv_device* dev;
-	int channel;
+	std::shared_ptr<nv_hwchannel> hwchan;
 
-	nv_channel(struct nv_device* dev, int channel)
-	: dev(dev), channel(channel)
+	nv_channel(std::shared_ptr<nv_hwchannel> hwchan)
+	: hwchan(hwchan)
 	{}
 
 	void out(uint32_t v)
@@ -1139,8 +1249,8 @@ struct nv_channel_direct : public nv_channel
 {
 	uint32_t put;
 
-        nv_channel_direct(struct nv_device* dev, int channel)
-        : nv_channel(dev, channel)
+        nv_channel_direct(std::shared_ptr<nv_hwchannel> hwchan)
+        : nv_channel(hwchan)
         {
 	}
 
@@ -1148,7 +1258,7 @@ struct nv_channel_direct : public nv_channel
         {
                 uint32_t get;
                 while(get != put)
-                        get = dev->rd_get(channel);
+                        get = hwchan->rd_get();
         }	
 };
 
@@ -1165,25 +1275,25 @@ struct nv_channel_vram : public nv_channel_direct
 	uint32_t base;
         uint32_t our_put;
 
-        nv_channel_vram(struct nv_device* dev, int channel)
-        : nv_channel_direct(dev, channel)
+        nv_channel_vram(std::shared_ptr<nv_hwchannel> hwchan)
+        : nv_channel_direct(hwchan)
         {
         	nv_ramht_entry entry;
 		taken = false;
 
-		if(!nv_find_vram_ramht_entry(dev, channel, entry))
+		if(!nv_find_vram_ramht_entry(hwchan->ramht, hwchan->channel, entry))
 			throw std::runtime_error("Unable to find a vram ctxdma");
 		vram_ctxdma = entry.instance;
 		vram_handle = entry.handle;
 
 		/* the middle of vram is hopefully away from anything critical */
-		put = dev->rd_put(channel);
-		our_put = base = dev->vram_mappable_size / 2;
+		put = hwchan->rd_put();
+		our_put = base = hwchan->dev->vram_mappable_size / 2;
         }	
 
         virtual void outp(void* buf, int dwords)
         {
-                os->memcpy_to_phys(dev->vram_phys + our_put, (const char*)buf, dwords * 4);
+                os->memcpy_to_phys(hwchan->dev->vram_phys + our_put, (const char*)buf, dwords * 4);
                 our_put += dwords * 4;
         }
 
@@ -1191,22 +1301,22 @@ struct nv_channel_vram : public nv_channel_direct
         {
         	if(put != our_put) {
 			if(!taken) {
-				orig_getput = dev->rd_put(channel);
+				orig_getput = hwchan->rd_put();
 				put = base;
 
-				orig_ctxdma = dev->ramfc->fc[channel]->rd32(dev->ramfc->fc_fifo) << 4;
-				if(dev->card_type < NV_40)
-					orig_ctxdma &= 0xfffff;
-				dev->ramfc->fc[channel]->wr32(dev->ramfc->fc_fifo, vram_ctxdma >> 4);
+				orig_ctxdma = hwchan->rd_dma();
+				hwchan->wr_dma(vram_ctxdma);
 
-				dev->ramfc->fc[channel]->wr32(0, put);
-				dev->ramfc->fc[channel]->wr32(4, put);
+				/* TODO: what if it is being run now?
+				 * TODO: move this logic to hwchan? */
+				hwchan->fifoctx->wr32(0, put);
+				hwchan->fifoctx->wr32(4, put);
 				//dev->wr_get(channel, put);
-				dev->wr_put(channel, put);
+				hwchan->wr_put(put);
 				taken = true;
 			}
 			put = our_put;
-			dev->wr_put(channel, put);
+			hwchan->wr_put(put);
         	}
         }
 
@@ -1215,25 +1325,24 @@ struct nv_channel_vram : public nv_channel_direct
         }
 };
 
-struct nv_channel_runner
+struct nv_runner
 {
 	virtual void run() = 0;
 };
 
 struct nv_channel_parasite : public nv_channel_direct
 {
-	std::unique_ptr<nv_object> dmaobj;
+	std::shared_ptr<nv_object> dmaobj;
 	uint32_t dmactx;
 	uint32_t our_put;
 	uint32_t orig_getput;
-	nv_channel_runner* runner;
+	nv_runner* runner;
 
-	nv_channel_parasite(struct nv_device* dev, int channel, nv_channel_runner* runner)
-	: nv_channel_direct(dev, channel), runner(runner)
+	nv_channel_parasite(std::shared_ptr<nv_hwchannel> hwchan, nv_runner* runner)
+	: nv_channel_direct(hwchan), runner(runner)
 	{
-		uint32_t dmactx = dev->ramfc->fc[channel]->rd32(dev->ramfc->fc_fifo) << 4;
-		dmaobj.reset(new nv_object(&*dev->ramin, dmactx));
-		orig_getput = our_put = put = dev->rd_put(channel);
+		dmaobj.reset(new nv_object(hwchan->dev->ramin, hwchan->rd_dma()));
+		orig_getput = our_put = put = hwchan->rd_put();
 	}
 
 	virtual void outp(void* buf, int dwords)
@@ -1248,7 +1357,7 @@ struct nv_channel_parasite : public nv_channel_direct
 	{
 		if(put != our_put) {
 			put = our_put;
-			dev->wr_put(channel, put);
+			hwchan->wr_put(put);
 		}
 	}
 
@@ -1264,18 +1373,19 @@ struct nv_channel_parasite : public nv_channel_direct
                         fire();
                         wait_idle();
 
-			dev->wr_get(channel, orig_getput);
-			dev->wr_put(channel, orig_getput);
+			hwchan->wr_get(orig_getput);
+			hwchan->wr_put(orig_getput);
 
 			runner->run();
+			wait_idle();
 
-			orig_getput = our_put = put = dev->rd_put(channel);
+			orig_getput = our_put = put = hwchan->rd_put();
                 }
         }
 };
 
 /* Channel implementation based on stealing the channel of a spawned GL process */
-struct nv_gl_process : public nv_channel_runner
+struct nv_gl_process : public nv_runner
 {
 	pid_t pid;