23 files changed, 475 insertions, 382 deletions
diff --git a/linux-core/drmP.h b/linux-core/drmP.h
index 6f6f91c4..004f9637 100644
--- a/linux-core/drmP.h
+++ b/linux-core/drmP.h
@@ -488,7 +488,6 @@ typedef struct drm_agp_mem {
 
 typedef struct drm_agp_head {
 	agp_kern_info      agp_info;
-	const char         *chipset;
 	drm_agp_mem_t      *memory;
 	unsigned long      mode;
 	int                enabled;
@@ -518,6 +517,17 @@ typedef struct drm_map_list {
 	drm_map_t		*map;
 } drm_map_list_t;
 
+#if __HAVE_VBL_IRQ
+
+typedef struct drm_vbl_sig {
+	struct list_head	head;
+	unsigned int		sequence;
+	struct siginfo		info;
+	struct task_struct	*task;
+} drm_vbl_sig_t;
+
+#endif
+
 typedef struct drm_device {
 	const char	  *name;	/* Simple driver name		   */
 	char		  *unique;	/* Unique identifier: e.g., busid  */
@@ -580,6 +590,8 @@ typedef struct drm_device {
 #if __HAVE_VBL_IRQ
    	wait_queue_head_t vbl_queue;
    	atomic_t          vbl_received;
+	spinlock_t        vbl_lock;
+	drm_vbl_sig_t     vbl_sigs;
 #endif
 	cycles_t	  ctx_start;
 	cycles_t	  lck_start;
@@ -820,6 +832,7 @@ extern void          DRM(driver_irq_uninstall)( drm_device_t *dev );
 extern int           DRM(wait_vblank)(struct inode *inode, struct file *filp,
 				      unsigned int cmd, unsigned long arg);
 extern int           DRM(vblank_wait)(drm_device_t *dev, unsigned int *vbl_seq);
+extern void          DRM(vbl_send_signals)( drm_device_t *dev );
 #endif
 #if __HAVE_DMA_IRQ_BH
 extern void          DRM(dma_immediate_bh)( void *dev );
diff --git a/linux-core/drm_agpsupport.c b/linux-core/drm_agpsupport.c
index cd46110c..6d6b5911 100644
--- a/linux-core/drm_agpsupport.c
+++ b/linux-core/drm_agpsupport.c
@@ -260,60 +260,6 @@ drm_agp_head_t *DRM(agp_init)(void)
 			return NULL;
 		}
 		head->memory = NULL;
-		switch (head->agp_info.chipset) {
-		case INTEL_GENERIC:	head->chipset = "Intel";         break;
-		case INTEL_LX:		head->chipset = "Intel 440LX";   break;
-		case INTEL_BX:		head->chipset = "Intel 440BX";   break;
-		case INTEL_GX:		head->chipset = "Intel 440GX";   break;
-		case INTEL_I810:	head->chipset = "Intel i810";    break;
-
-		case INTEL_I815:	head->chipset = "Intel i815";	 break;
-#if LINUX_VERSION_CODE >= 0x02040f /* KERNEL_VERSION(2,4,15) */
-	 	case INTEL_I820:	head->chipset = "Intel i820";	 break;
-#endif
-		case INTEL_I840:	head->chipset = "Intel i840";    break;
-#if LINUX_VERSION_CODE >= 0x02040f /* KERNEL_VERSION(2,4,15) */
-		case INTEL_I845:	head->chipset = "Intel i845";    break;
-#endif
-		case INTEL_I850:	head->chipset = "Intel i850";	 break;
-
-		case VIA_GENERIC:	head->chipset = "VIA";           break;
-		case VIA_VP3:		head->chipset = "VIA VP3";       break;
-		case VIA_MVP3:		head->chipset = "VIA MVP3";      break;
-		case VIA_MVP4:		head->chipset = "VIA MVP4";      break;
-		case VIA_APOLLO_KX133:	head->chipset = "VIA Apollo KX133";
-			break;
-		case VIA_APOLLO_KT133:	head->chipset = "VIA Apollo KT133";
-			break;
-
-		case VIA_APOLLO_PRO: 	head->chipset = "VIA Apollo Pro";
-			break;
-		case SIS_GENERIC:	head->chipset = "SiS";           break;
-		case AMD_GENERIC:	head->chipset = "AMD";           break;
-		case AMD_IRONGATE:	head->chipset = "AMD Irongate";  break;
-		case ALI_GENERIC:	head->chipset = "ALi";           break;
-		case ALI_M1541: 	head->chipset = "ALi M1541";     break;
-
-#if LINUX_VERSION_CODE >= 0x020402
-		case ALI_M1621: 	head->chipset = "ALi M1621";	 break;
-		case ALI_M1631: 	head->chipset = "ALi M1631";	 break;
-		case ALI_M1632: 	head->chipset = "ALi M1632";	 break;
-		case ALI_M1641: 	head->chipset = "ALi M1641";	 break;
-		case ALI_M1647: 	head->chipset = "ALi M1647";	 break;
-		case ALI_M1651: 	head->chipset = "ALi M1651";	 break;
-#endif
-
-#if LINUX_VERSION_CODE >= 0x020406
-		case SVWRKS_HE: 	head->chipset = "Serverworks HE";
-			break;
-		case SVWRKS_LE: 	head->chipset = "Serverworks LE";
-			break;
-		case SVWRKS_GENERIC: 	head->chipset = "Serverworks Generic";
-			break;
-#endif
-
-		default:		head->chipset = "Unknown";       break;
-		}
 #if LINUX_VERSION_CODE <= 0x020408
 		head->cant_use_aperture = 0;
 		head->page_mask = ~(0xfff);
@@ -321,13 +267,12 @@ drm_agp_head_t *DRM(agp_init)(void)
 		head->cant_use_aperture = head->agp_info.cant_use_aperture;
 		head->page_mask = head->agp_info.page_mask;
 #endif
-
-		DRM_INFO("AGP %d.%d on %s @ 0x%08lx %ZuMB\n",
-			 head->agp_info.version.major,
-			 head->agp_info.version.minor,
-			 head->chipset,
-			 head->agp_info.aper_base,
-			 head->agp_info.aper_size);
+		
+		DRM_DEBUG("AGP %d.%d, aperture @ 0x%08lx %ZuMB\n",
+			  head->agp_info.version.major,
+			  head->agp_info.version.minor,
+			  head->agp_info.aper_base,
+			  head->agp_info.aper_size);
 	}
 	return head;
 }
diff --git a/linux-core/drm_dma.c b/linux-core/drm_dma.c
index dc041592..46393a5d 100644
--- a/linux-core/drm_dma.c
+++ b/linux-core/drm_dma.c
@@ -540,6 +540,10 @@ int DRM(irq_install)( drm_device_t *dev, int irq )
 
 #if __HAVE_VBL_IRQ
 	init_waitqueue_head(&dev->vbl_queue);
+
+	spin_lock_init( &dev->vbl_lock );
+
+	INIT_LIST_HEAD( &dev->vbl_sigs.head );
 #endif
 
 				/* Before installing handler */
@@ -610,7 +614,8 @@ int DRM(wait_vblank)( DRM_IOCTL_ARGS )
 	drm_device_t *dev = priv->dev;
 	drm_wait_vblank_t vblwait;
 	struct timeval now;
-	int ret;
+	int ret = 0;
+	unsigned int flags;
 
 	if (!dev->irq)
 		return -EINVAL;
@@ -618,15 +623,45 @@ int DRM(wait_vblank)( DRM_IOCTL_ARGS )
 	DRM_COPY_FROM_USER_IOCTL( vblwait, (drm_wait_vblank_t *)data,
 				  sizeof(vblwait) );
 
-	if ( vblwait.type == _DRM_VBLANK_RELATIVE ) {
-		vblwait.sequence += atomic_read( &dev->vbl_received );
+	switch ( vblwait.request.type & ~_DRM_VBLANK_FLAGS_MASK ) {
+	case _DRM_VBLANK_RELATIVE:
+		vblwait.request.sequence += atomic_read( &dev->vbl_received );
+	case _DRM_VBLANK_ABSOLUTE:
+		break;
+	default:
+		return -EINVAL;
 	}
 
-	ret = DRM(vblank_wait)( dev, &vblwait.sequence );
+	flags = vblwait.request.type & _DRM_VBLANK_FLAGS_MASK;
+	
+	if ( flags & _DRM_VBLANK_SIGNAL ) {
+		unsigned long irqflags;
+		drm_vbl_sig_t *vbl_sig = DRM_MALLOC( sizeof( drm_vbl_sig_t ) );
+
+		if ( !vbl_sig )
+			return -ENOMEM;
+
+		memset( (void *)vbl_sig, 0, sizeof(*vbl_sig) );
 
-	do_gettimeofday( &now );
-	vblwait.tval_sec = now.tv_sec;
-	vblwait.tval_usec = now.tv_usec;
+		vbl_sig->sequence = vblwait.request.sequence;
+		vbl_sig->info.si_signo = vblwait.request.signal;
+		vbl_sig->task = current;
+
+		vblwait.reply.sequence = atomic_read( &dev->vbl_received );
+
+		/* Hook signal entry into list */
+		spin_lock_irqsave( &dev->vbl_lock, irqflags );
+
+		list_add_tail( (struct list_head *) vbl_sig, &dev->vbl_sigs.head );
+
+		spin_unlock_irqrestore( &dev->vbl_lock, irqflags );
+	} else {
+		ret = DRM(vblank_wait)( dev, &vblwait.request.sequence );
+
+		do_gettimeofday( &now );
+		vblwait.reply.tval_sec = now.tv_sec;
+		vblwait.reply.tval_usec = now.tv_usec;
+	}
 
 	DRM_COPY_TO_USER_IOCTL( (drm_wait_vblank_t *)data, vblwait,
 				sizeof(vblwait) );
@@ -634,6 +669,33 @@ int DRM(wait_vblank)( DRM_IOCTL_ARGS )
 	return ret;
 }
 
+void DRM(vbl_send_signals)( drm_device_t *dev )
+{
+	struct list_head *entry, *tmp;
+	drm_vbl_sig_t *vbl_sig;
+	unsigned int vbl_seq = atomic_read( &dev->vbl_received );
+	unsigned long flags;
+
+	spin_lock_irqsave( &dev->vbl_lock, flags );
+
+	list_for_each_safe( entry, tmp, &dev->vbl_sigs.head ) {
+
+		vbl_sig = (drm_vbl_sig_t *) entry;
+
+		if ( ( vbl_seq - vbl_sig->sequence ) <= (1<<23) ) {
+
+			vbl_sig->info.si_code = atomic_read( &dev->vbl_received );
+			send_sig_info( vbl_sig->info.si_signo, &vbl_sig->info, vbl_sig->task );
+
+			list_del( entry );
+
+			DRM_FREE( entry );
+		}
+	}
+
+	spin_unlock_irqrestore( &dev->vbl_lock, flags );
+}
+
 #endif	/* __HAVE_VBL_IRQ */
 
 #else
diff --git a/linux-core/drm_proc.c b/linux-core/drm_proc.c
index 24e8556f..d29db7b7 100644
--- a/linux-core/drm_proc.c
+++ b/linux-core/drm_proc.c
@@ -449,7 +449,8 @@ static int DRM(_vma_info)(char *buf, char **start, off_t offset, int request,
 		for (i = vma->vm_start; i < vma->vm_end; i += PAGE_SIZE) {
 			pgd = pgd_offset(vma->vm_mm, i);
 			pmd = pmd_offset(pgd, i);
-			pte = pte_offset(pmd, i);
+			preempt_disable();
+			pte = pte_offset_map(pmd, i);
 			if (pte_present(*pte)) {
 				address = __pa(pte_page(*pte))
 					+ (i & (PAGE_SIZE-1));
@@ -465,6 +466,8 @@ static int DRM(_vma_info)(char *buf, char **start, off_t offset, int request,
 			} else {
 				DRM_PROC_PRINT("      0x%08lx\n", i);
 			}
+			pte_unmap(pte);
+			preempt_enable();
 		}
 #endif
 	}
diff --git a/linux-core/i810_dma.c b/linux-core/i810_dma.c
index 13f5f64f..ffb7c708 100644
--- a/linux-core/i810_dma.c
+++ b/linux-core/i810_dma.c
@@ -38,6 +38,7 @@
 #include "i810_drv.h"
 #include <linux/interrupt.h>	/* For task queue support */
 #include <linux/delay.h>
+#include <linux/pagemap.h>
 
 #ifdef DO_MUNMAP_4_ARGS
 #define DO_MUNMAP(m, a, l)	do_munmap(m, a, l, 1)
@@ -1184,7 +1185,8 @@ int i810_ov0_info(struct inode *inode, struct file *filp,
 
 	data.offset = dev_priv->overlay_offset;
 	data.physical = dev_priv->overlay_physical;
-	copy_to_user((drm_i810_overlay_t *)arg,&data,sizeof(data));
+	if (copy_to_user((drm_i810_overlay_t *)arg,&data,sizeof(data)))
+		return -EFAULT;
 	return 0;
 }
 
diff --git a/linux/drm.h b/linux/drm.h
index f26d4442..d1d66943 100644
--- a/linux/drm.h
+++ b/linux/drm.h
@@ -346,17 +346,30 @@ typedef struct drm_irq_busid {
 } drm_irq_busid_t;
 
 typedef enum {
-    _DRM_VBLANK_ABSOLUTE = 0x0,	/* Wait for specific vblank sequence number */
-    _DRM_VBLANK_RELATIVE = 0x1	/* Wait for given number of vblanks */
+    _DRM_VBLANK_ABSOLUTE = 0x0,		/* Wait for specific vblank sequence number */
+    _DRM_VBLANK_RELATIVE = 0x1,		/* Wait for given number of vblanks */
+    _DRM_VBLANK_SIGNAL   = 0x40000000	/* Send signal instead of blocking */
 } drm_vblank_seq_type_t;
 
-typedef struct drm_radeon_vbl_wait {
+#define _DRM_VBLANK_FLAGS_MASK _DRM_VBLANK_SIGNAL
+
+struct drm_wait_vblank_request {
+	drm_vblank_seq_type_t type;
+	unsigned int sequence;
+	unsigned long signal;
+};
+
+struct drm_wait_vblank_reply {
 	drm_vblank_seq_type_t type;
 	unsigned int sequence;
 	long tval_sec;
 	long tval_usec;
-} drm_wait_vblank_t;
+};
 
+typedef union drm_wait_vblank {
+	struct drm_wait_vblank_request request;
+	struct drm_wait_vblank_reply reply;
+} drm_wait_vblank_t;
 
 typedef struct drm_agp_mode {
 	unsigned long mode;
diff --git a/linux/drmP.h b/linux/drmP.h
index 6f6f91c4..004f9637 100644
--- a/linux/drmP.h
+++ b/linux/drmP.h
@@ -488,7 +488,6 @@ typedef struct drm_agp_mem {
 
 typedef struct drm_agp_head {
 	agp_kern_info      agp_info;
-	const char         *chipset;
 	drm_agp_mem_t      *memory;
 	unsigned long      mode;
 	int                enabled;
@@ -518,6 +517,17 @@ typedef struct drm_map_list {
 	drm_map_t		*map;
 } drm_map_list_t;
 
+#if __HAVE_VBL_IRQ
+
+typedef struct drm_vbl_sig {
+	struct list_head	head;
+	unsigned int		sequence;
+	struct siginfo		info;
+	struct task_struct	*task;
+} drm_vbl_sig_t;
+
+#endif
+
 typedef struct drm_device {
 	const char	  *name;	/* Simple driver name		   */
 	char		  *unique;	/* Unique identifier: e.g., busid  */
@@ -580,6 +590,8 @@ typedef struct drm_device {
 #if __HAVE_VBL_IRQ
    	wait_queue_head_t vbl_queue;
    	atomic_t          vbl_received;
+	spinlock_t        vbl_lock;
+	drm_vbl_sig_t     vbl_sigs;
 #endif
 	cycles_t	  ctx_start;
 	cycles_t	  lck_start;
@@ -820,6 +832,7 @@ extern void          DRM(driver_irq_uninstall)( drm_device_t *dev );
 extern int           DRM(wait_vblank)(struct inode *inode, struct file *filp,
 				      unsigned int cmd, unsigned long arg);
 extern int           DRM(vblank_wait)(drm_device_t *dev, unsigned int *vbl_seq);
+extern void          DRM(vbl_send_signals)( drm_device_t *dev );
 #endif
 #if __HAVE_DMA_IRQ_BH
 extern void          DRM(dma_immediate_bh)( void *dev );
diff --git a/linux/drm_agpsupport.h b/linux/drm_agpsupport.h
index cd46110c..6d6b5911 100644
--- a/linux/drm_agpsupport.h
+++ b/linux/drm_agpsupport.h
@@ -260,60 +260,6 @@ drm_agp_head_t *DRM(agp_init)(void)
 			return NULL;
 		}
 		head->memory = NULL;
-		switch (head->agp_info.chipset) {
-		case INTEL_GENERIC:	head->chipset = "Intel";         break;
-		case INTEL_LX:		head->chipset = "Intel 440LX";   break;
-		case INTEL_BX:		head->chipset = "Intel 440BX";   break;
-		case INTEL_GX:		head->chipset = "Intel 440GX";   break;
-		case INTEL_I810:	head->chipset = "Intel i810";    break;
-
-		case INTEL_I815:	head->chipset = "Intel i815";	 break;
-#if LINUX_VERSION_CODE >= 0x02040f /* KERNEL_VERSION(2,4,15) */
-	 	case INTEL_I820:	head->chipset = "Intel i820";	 break;
-#endif
-		case INTEL_I840:	head->chipset = "Intel i840";    break;
-#if LINUX_VERSION_CODE >= 0x02040f /* KERNEL_VERSION(2,4,15) */
-		case INTEL_I845:	head->chipset = "Intel i845";    break;
-#endif
-		case INTEL_I850:	head->chipset = "Intel i850";	 break;
-
-		case VIA_GENERIC:	head->chipset = "VIA";           break;
-		case VIA_VP3:		head->chipset = "VIA VP3";       break;
-		case VIA_MVP3:		head->chipset = "VIA MVP3";      break;
-		case VIA_MVP4:		head->chipset = "VIA MVP4";      break;
-		case VIA_APOLLO_KX133:	head->chipset = "VIA Apollo KX133";
-			break;
-		case VIA_APOLLO_KT133:	head->chipset = "VIA Apollo KT133";
-			break;
-
-		case VIA_APOLLO_PRO: 	head->chipset = "VIA Apollo Pro";
-			break;
-		case SIS_GENERIC:	head->chipset = "SiS";           break;
-		case AMD_GENERIC:	head->chipset = "AMD";           break;
-		case AMD_IRONGATE:	head->chipset = "AMD Irongate";  break;
-		case ALI_GENERIC:	head->chipset = "ALi";           break;
-		case ALI_M1541: 	head->chipset = "ALi M1541";     break;
-
-#if LINUX_VERSION_CODE >= 0x020402
-		case ALI_M1621: 	head->chipset = "ALi M1621";	 break;
-		case ALI_M1631: 	head->chipset = "ALi M1631";	 break;
-		case ALI_M1632: 	head->chipset = "ALi M1632";	 break;
-		case ALI_M1641: 	head->chipset = "ALi M1641";	 break;
-		case ALI_M1647: 	head->chipset = "ALi M1647";	 break;
-		case ALI_M1651: 	head->chipset = "ALi M1651";	 break;
-#endif
-
-#if LINUX_VERSION_CODE >= 0x020406
-		case SVWRKS_HE: 	head->chipset = "Serverworks HE";
-			break;
-		case SVWRKS_LE: 	head->chipset = "Serverworks LE";
-			break;
-		case SVWRKS_GENERIC: 	head->chipset = "Serverworks Generic";
-			break;
-#endif
-
-		default:		head->chipset = "Unknown";       break;
-		}
 #if LINUX_VERSION_CODE <= 0x020408
 		head->cant_use_aperture = 0;
 		head->page_mask = ~(0xfff);
@@ -321,13 +267,12 @@ drm_agp_head_t *DRM(agp_init)(void)
 		head->cant_use_aperture = head->agp_info.cant_use_aperture;
 		head->page_mask = head->agp_info.page_mask;
 #endif
-
-		DRM_INFO("AGP %d.%d on %s @ 0x%08lx %ZuMB\n",
-			 head->agp_info.version.major,
-			 head->agp_info.version.minor,
-			 head->chipset,
-			 head->agp_info.aper_base,
-			 head->agp_info.aper_size);
+		
+		DRM_DEBUG("AGP %d.%d, aperture @ 0x%08lx %ZuMB\n",
+			  head->agp_info.version.major,
+			  head->agp_info.version.minor,
+			  head->agp_info.aper_base,
+			  head->agp_info.aper_size);
 	}
 	return head;
 }
diff --git a/linux/drm_dma.h b/linux/drm_dma.h
index dc041592..46393a5d 100644
--- a/linux/drm_dma.h
+++ b/linux/drm_dma.h
@@ -540,6 +540,10 @@ int DRM(irq_install)( drm_device_t *dev, int irq )
 
 #if __HAVE_VBL_IRQ
 	init_waitqueue_head(&dev->vbl_queue);
+
+	spin_lock_init( &dev->vbl_lock );
+
+	INIT_LIST_HEAD( &dev->vbl_sigs.head );
 #endif
 
 				/* Before installing handler */
@@ -610,7 +614,8 @@ int DRM(wait_vblank)( DRM_IOCTL_ARGS )
 	drm_device_t *dev = priv->dev;
 	drm_wait_vblank_t vblwait;
 	struct timeval now;
-	int ret;
+	int ret = 0;
+	unsigned int flags;
 
 	if (!dev->irq)
 		return -EINVAL;
@@ -618,15 +623,45 @@ int DRM(wait_vblank)( DRM_IOCTL_ARGS )
 	DRM_COPY_FROM_USER_IOCTL( vblwait, (drm_wait_vblank_t *)data,
 				  sizeof(vblwait) );
 
-	if ( vblwait.type == _DRM_VBLANK_RELATIVE ) {
-		vblwait.sequence += atomic_read( &dev->vbl_received );
+	switch ( vblwait.request.type & ~_DRM_VBLANK_FLAGS_MASK ) {
+	case _DRM_VBLANK_RELATIVE:
+		vblwait.request.sequence += atomic_read( &dev->vbl_received );
+	case _DRM_VBLANK_ABSOLUTE:
+		break;
+	default:
+		return -EINVAL;
 	}
 
-	ret = DRM(vblank_wait)( dev, &vblwait.sequence );
+	flags = vblwait.request.type & _DRM_VBLANK_FLAGS_MASK;
+	
+	if ( flags & _DRM_VBLANK_SIGNAL ) {
+		unsigned long irqflags;
+		drm_vbl_sig_t *vbl_sig = DRM_MALLOC( sizeof( drm_vbl_sig_t ) );
+
+		if ( !vbl_sig )
+			return -ENOMEM;
+
+		memset( (void *)vbl_sig, 0, sizeof(*vbl_sig) );
 
-	do_gettimeofday( &now );
-	vblwait.tval_sec = now.tv_sec;
-	vblwait.tval_usec = now.tv_usec;
+		vbl_sig->sequence = vblwait.request.sequence;
+		vbl_sig->info.si_signo = vblwait.request.signal;
+		vbl_sig->task = current;
+
+		vblwait.reply.sequence = atomic_read( &dev->vbl_received );
+
+		/* Hook signal entry into list */
+		spin_lock_irqsave( &dev->vbl_lock, irqflags );
+
+		list_add_tail( (struct list_head *) vbl_sig, &dev->vbl_sigs.head );
+
+		spin_unlock_irqrestore( &dev->vbl_lock, irqflags );
+	} else {
+		ret = DRM(vblank_wait)( dev, &vblwait.request.sequence );
+
+		do_gettimeofday( &now );
+		vblwait.reply.tval_sec = now.tv_sec;
+		vblwait.reply.tval_usec = now.tv_usec;
+	}
 
 	DRM_COPY_TO_USER_IOCTL( (drm_wait_vblank_t *)data, vblwait,
 				sizeof(vblwait) );
@@ -634,6 +669,33 @@ int DRM(wait_vblank)( DRM_IOCTL_ARGS )
 	return ret;
 }
 
+void DRM(vbl_send_signals)( drm_device_t *dev )
+{
+	struct list_head *entry, *tmp;
+	drm_vbl_sig_t *vbl_sig;
+	unsigned int vbl_seq = atomic_read( &dev->vbl_received );
+	unsigned long flags;
+
+	spin_lock_irqsave( &dev->vbl_lock, flags );
+
+	list_for_each_safe( entry, tmp, &dev->vbl_sigs.head ) {
+
+		vbl_sig = (drm_vbl_sig_t *) entry;
+
+		if ( ( vbl_seq - vbl_sig->sequence ) <= (1<<23) ) {
+
+			vbl_sig->info.si_code = atomic_read( &dev->vbl_received );
+			send_sig_info( vbl_sig->info.si_signo, &vbl_sig->info, vbl_sig->task );
+
+			list_del( entry );
+
+			DRM_FREE( entry );
+		}
+	}
+
+	spin_unlock_irqrestore( &dev->vbl_lock, flags );
+}
+
 #endif	/* __HAVE_VBL_IRQ */
 
 #else
diff --git a/linux/drm_proc.h b/linux/drm_proc.h
index 24e8556f..d29db7b7 100644
--- a/linux/drm_proc.h
+++ b/linux/drm_proc.h
@@ -449,7 +449,8 @@ static int DRM(_vma_info)(char *buf, char **start, off_t offset, int request,
 		for (i = vma->vm_start; i < vma->vm_end; i += PAGE_SIZE) {
 			pgd = pgd_offset(vma->vm_mm, i);
 			pmd = pmd_offset(pgd, i);
-			pte = pte_offset(pmd, i);
+			preempt_disable();
+			pte = pte_offset_map(pmd, i);
 			if (pte_present(*pte)) {
 				address = __pa(pte_page(*pte))
 					+ (i & (PAGE_SIZE-1));
@@ -465,6 +466,8 @@ static int DRM(_vma_info)(char *buf, char **start, off_t offset, int request,
 			} else {
 				DRM_PROC_PRINT("      0x%08lx\n", i);
 			}
+			pte_unmap(pte);
+			preempt_enable();
 		}
 #endif
 	}
diff --git a/linux/gamma_drm.h b/linux/gamma_drm.h
index d06763ae..0d58b07b 100644
--- a/linux/gamma_drm.h
+++ b/linux/gamma_drm.h
@@ -48,6 +48,16 @@ typedef struct _drm_gamma_sarea {
 	int vertex_prim;
 } drm_gamma_sarea_t;
 
+/* WARNING: If you change any of these defines, make sure to change the
+ * defines in the Xserver file (xf86drmGamma.h)
+ */
+
+/* Gamma specific ioctls
+ * The device specific ioctl range is 0x40 to 0x79.
+ */
+#define DRM_IOCTL_GAMMA_INIT		DRM_IOW( 0x40, drm_gamma_init_t)
+#define DRM_IOCTL_GAMMA_COPY		DRM_IOW( 0x41, drm_gamma_copy_t)
+
 typedef struct drm_gamma_copy {
 	unsigned int	DMAOutputAddress;
 	unsigned int	DMAOutputCount;
diff --git a/linux/i810_dma.c b/linux/i810_dma.c
index 13f5f64f..ffb7c708 100644
--- a/linux/i810_dma.c
+++ b/linux/i810_dma.c
@@ -38,6 +38,7 @@
 #include "i810_drv.h"
 #include <linux/interrupt.h>	/* For task queue support */
 #include <linux/delay.h>
+#include <linux/pagemap.h>
 
 #ifdef DO_MUNMAP_4_ARGS
 #define DO_MUNMAP(m, a, l)	do_munmap(m, a, l, 1)
@@ -1184,7 +1185,8 @@ int i810_ov0_info(struct inode *inode, struct file *filp,
 
 	data.offset = dev_priv->overlay_offset;
 	data.physical = dev_priv->overlay_physical;
-	copy_to_user((drm_i810_overlay_t *)arg,&data,sizeof(data));
+	if (copy_to_user((drm_i810_overlay_t *)arg,&data,sizeof(data)))
+		return -EFAULT;
 	return 0;
 }
 
diff --git a/linux/sis_ds.c b/linux/sis_ds.c
index 95880a48..f55cf6ab 100644
--- a/linux/sis_ds.c
+++ b/linux/sis_ds.c
@@ -50,15 +50,16 @@ set_t *setInit(void)
   set_t *set;
 
   set = (set_t *)MALLOC(sizeof(set_t));
-  for(i = 0; i < SET_SIZE; i++){
-    set->list[i].free_next = i+1;    
-    set->list[i].alloc_next = -1;
-  }    
-  set->list[SET_SIZE-1].free_next = -1;
-  set->free = 0;
-  set->alloc = -1;
-  set->trace = -1;
-  
+  if (set) {
+    for(i = 0; i < SET_SIZE; i++){
+      set->list[i].free_next = i+1;    
+      set->list[i].alloc_next = -1;
+    }    
+    set->list[SET_SIZE-1].free_next = -1;
+    set->free = 0;
+    set->alloc = -1;
+    set->trace = -1;
+  }  
   return set;
 }
 
@@ -172,7 +173,8 @@ static void *calloc(size_t nmemb, size_t size)
 {
   void *addr;
   addr = kmalloc(nmemb*size, GFP_KERNEL);
-  memset(addr, 0, nmemb*size);
+  if (addr)
+    memset(addr, 0, nmemb*size);
   return addr;
 }
 #define free(n) kfree(n)
diff --git a/shared-core/drm.h b/shared-core/drm.h
index f26d4442..d1d66943 100644
--- a/shared-core/drm.h
+++ b/shared-core/drm.h
@@ -346,17 +346,30 @@ typedef struct drm_irq_busid {
 } drm_irq_busid_t;
 
 typedef enum {
-    _DRM_VBLANK_ABSOLUTE = 0x0,	/* Wait for specific vblank sequence number */
-    _DRM_VBLANK_RELATIVE = 0x1	/* Wait for given number of vblanks */
+    _DRM_VBLANK_ABSOLUTE = 0x0,		/* Wait for specific vblank sequence number */
+    _DRM_VBLANK_RELATIVE = 0x1,		/* Wait for given number of vblanks */
+    _DRM_VBLANK_SIGNAL   = 0x40000000	/* Send signal instead of blocking */
 } drm_vblank_seq_type_t;
 
-typedef struct drm_radeon_vbl_wait {
+#define _DRM_VBLANK_FLAGS_MASK _DRM_VBLANK_SIGNAL
+
+struct drm_wait_vblank_request {
+	drm_vblank_seq_type_t type;
+	unsigned int sequence;
+	unsigned long signal;
+};
+
+struct drm_wait_vblank_reply {
 	drm_vblank_seq_type_t type;
 	unsigned int sequence;
 	long tval_sec;
 	long tval_usec;
-} drm_wait_vblank_t;
+};
 
+typedef union drm_wait_vblank {
+	struct drm_wait_vblank_request request;
+	struct drm_wait_vblank_reply reply;
+} drm_wait_vblank_t;
 
 typedef struct drm_agp_mode {
 	unsigned long mode;
diff --git a/shared-core/mga_irq.c b/shared-core/mga_irq.c
index 568d193f..28e9a262 100644
--- a/shared-core/mga_irq.c
+++ b/shared-core/mga_irq.c
@@ -50,6 +50,7 @@ void mga_dma_service( DRM_IRQ_ARGS )
 		MGA_WRITE( MGA_ICLEAR, MGA_VLINEICLR );
 		atomic_inc(&dev->vbl_received);
 		DRM_WAKEUP(&dev->vbl_queue);
+		DRM(vbl_send_signals)( dev );
 	}
 }
 
@@ -64,7 +65,7 @@ int mga_vblank_wait(drm_device_t *dev, unsigned int *sequence)
 	 */
 	DRM_WAIT_ON( ret, dev->vbl_queue, 3*DRM_HZ, 
 		     ( ( ( cur_vblank = atomic_read(&dev->vbl_received ) )
-			 + ~*sequence + 1 ) <= (1<<23) ) );
+			 - *sequence ) <= (1<<23) ) );
 
 	*sequence = cur_vblank;
 
diff --git a/shared-core/r128_irq.c b/shared-core/r128_irq.c
index a29a81b5..bfc30405 100644
--- a/shared-core/r128_irq.c
+++ b/shared-core/r128_irq.c
@@ -50,6 +50,7 @@ void r128_dma_service( DRM_IRQ_ARGS )
 		R128_WRITE( R128_GEN_INT_STATUS, R128_CRTC_VBLANK_INT_AK );
 		atomic_inc(&dev->vbl_received);
 		DRM_WAKEUP(&dev->vbl_queue);
+		DRM(vbl_send_signals)( dev );
 	}
 }
 
@@ -64,7 +65,7 @@ int DRM(vblank_wait)(drm_device_t *dev, unsigned int *sequence)
 	 */
 	DRM_WAIT_ON( ret, dev->vbl_queue, 3*DRM_HZ, 
 		     ( ( ( cur_vblank = atomic_read(&dev->vbl_received ) )
-			 + ~*sequence + 1 ) <= (1<<23) ) );
+			 - *sequence ) <= (1<<23) ) );
 
 	*sequence = cur_vblank;
 
diff --git a/shared-core/radeon_irq.c b/shared-core/radeon_irq.c
index 54702bee..c5cd61c5 100644
--- a/shared-core/radeon_irq.c
+++ b/shared-core/radeon_irq.c
@@ -70,13 +70,12 @@ void DRM(dma_service)( DRM_IRQ_ARGS )
 		DRM_WAKEUP( &dev_priv->swi_queue );
 	}
 
-#if __HAVE_VBL_IRQ
 	/* VBLANK interrupt */
 	if (stat & RADEON_CRTC_VBLANK_STAT) {
 		atomic_inc(&dev->vbl_received);
 		DRM_WAKEUP(&dev->vbl_queue);
+		DRM(vbl_send_signals)( dev );
 	}
-#endif
 
 	/* Acknowledge all the bits in GEN_INT_STATUS -- seem to get
 	 * more than we asked for...
@@ -138,7 +137,6 @@ int radeon_emit_and_wait_irq(drm_device_t *dev)
 }
 
 
-#if __HAVE_VBL_IRQ
 int DRM(vblank_wait)(drm_device_t *dev, unsigned int *sequence)
 {
   	drm_radeon_private_t *dev_priv = 
@@ -161,13 +159,12 @@ int DRM(vblank_wait)(drm_device_t *dev, unsigned int *sequence)
 	 */
 	DRM_WAIT_ON( ret, dev->vbl_queue, 3*DRM_HZ, 
 		     ( ( ( cur_vblank = atomic_read(&dev->vbl_received ) )
-			 + ~*sequence + 1 ) <= (1<<23) ) );
+			 - *sequence ) <= (1<<23) ) );
 
 	*sequence = cur_vblank;
 
 	return ret;
 }
-#endif
 
 
 /* Needs the lock as it touches the ring.
diff --git a/shared-core/radeon_state.c b/shared-core/radeon_state.c
index 7b480a7e..1fe007b3 100644
--- a/shared-core/radeon_state.c
+++ b/shared-core/radeon_state.c
@@ -1074,19 +1074,30 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 	const u8 *data;
 	int size, dwords, tex_width, blit_width;
 	u32 y, height;
-	int ret = 0, i;
+	int i;
 	RING_LOCALS;
 
 	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
 
-	/* FIXME: Be smarter about this...
+	/* Flush the pixel cache.  This ensures no pixel data gets mixed
+	 * up with the texture data from the host data blit, otherwise
+	 * part of the texture image may be corrupted.
 	 */
-	buf = radeon_freelist_get( dev );
-	if ( !buf ) return DRM_ERR(EAGAIN);
+	BEGIN_RING( 4 );
+	RADEON_FLUSH_CACHE();
+	RADEON_WAIT_UNTIL_IDLE();
+	ADVANCE_RING();
+
+#ifdef __BIG_ENDIAN
+	/* The Mesa texture functions provide the data in little endian as the
+	 * chip wants it, but we need to compensate for the fact that the CP
+	 * ring gets byte-swapped
+	 */
+	BEGIN_RING( 2 );
+	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
+	ADVANCE_RING();
+#endif
 
-	DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
-		   tex->offset >> 10, tex->pitch, tex->format,
-		   image->x, image->y, image->width, image->height );
 
 	/* The compiler won't optimize away a division by a variable,
 	 * even if the only legal values are powers of two.  Thus, we'll
@@ -1120,127 +1131,113 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 		return DRM_ERR(EINVAL);
 	}
 
-	DRM_DEBUG( "   tex=%dx%d  blit=%d\n",
-		   tex_width, tex->height, blit_width );
-
-	/* Flush the pixel cache.  This ensures no pixel data gets mixed
-	 * up with the texture data from the host data blit, otherwise
-	 * part of the texture image may be corrupted.
-	 */
-	BEGIN_RING( 4 );
-
-	RADEON_FLUSH_CACHE();
-	RADEON_WAIT_UNTIL_IDLE();
-
-	ADVANCE_RING();
-
-#ifdef __BIG_ENDIAN
-	/* The Mesa texture functions provide the data in little endian as the
-	 * chip wants it, but we need to compensate for the fact that the CP
-	 * ring gets byte-swapped
-	 */
-	BEGIN_RING( 2 );
-	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
-	ADVANCE_RING();
-#endif
-
-	/* Make a copy of the parameters in case we have to update them
-	 * for a multi-pass texture blit.
-	 */
-	y = image->y;
-	height = image->height;
-	data = (const u8 *)image->data;
-
-	size = height * blit_width;
+	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
 
-	if ( size > RADEON_MAX_TEXTURE_SIZE ) {
-		/* Texture image is too large, do a multipass upload */
-		ret = DRM_ERR(EAGAIN);
+	do {
+		DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
+			   tex->offset >> 10, tex->pitch, tex->format,
+			   image->x, image->y, image->width, image->height );
 
-		/* Adjust the blit size to fit the indirect buffer */
-		height = RADEON_MAX_TEXTURE_SIZE / blit_width;
+		/* Make a copy of the parameters in case we have to
+		 * update them for a multi-pass texture blit.
+		 */
+		y = image->y;
+		height = image->height;
+		data = (const u8 *)image->data;
+		
 		size = height * blit_width;
 
+		if ( size > RADEON_MAX_TEXTURE_SIZE ) {
+			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
+			size = height * blit_width;
+		} else if ( size < 4 && size > 0 ) {
+			size = 4;
+		} else if ( size == 0 ) {
+			return 0;
+		}
+
 		/* Update the input parameters for next time */
 		image->y += height;
 		image->height -= height;
-		image->data = (const char *)image->data + size;
+		image->data += size;
 
-		if ( DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ) ) {
-			DRM_ERROR( "EFAULT on tex->image\n" );
-			return DRM_ERR(EFAULT);
+		buf = radeon_freelist_get( dev );
+		if ( 0 && !buf ) {
+			radeon_do_cp_idle( dev_priv );
+			buf = radeon_freelist_get( dev );
+		}
+		if ( !buf ) {
+			DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
+			DRM_COPY_TO_USER( tex->image, image, sizeof(*image) );
+			return DRM_ERR(EAGAIN);
 		}
-	} else if ( size < 4 && size > 0 ) {
-		size = 4;
-	}
 
-	dwords = size / 4;
 
-	/* Dispatch the indirect buffer.
-	 */
-	buffer = (u32 *)((char *)dev_priv->buffers->handle + buf->offset);
-
-	buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
-	buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
-		     RADEON_GMC_BRUSH_NONE |
-		     (format << 8) |
-		     RADEON_GMC_SRC_DATATYPE_COLOR |
-		     RADEON_ROP3_S |
-		     RADEON_DP_SRC_SOURCE_HOST_DATA |
-		     RADEON_GMC_CLR_CMP_CNTL_DIS |
-		     RADEON_GMC_WR_MSK_DIS);
-
-	buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
-	buffer[3] = 0xffffffff;
-	buffer[4] = 0xffffffff;
-	buffer[5] = (y << 16) | image->x;
-	buffer[6] = (height << 16) | image->width;
-	buffer[7] = dwords;
-
-	buffer += 8;
-
-	if ( tex_width >= 32 ) {
-		/* Texture image width is larger than the minimum, so we
-		 * can upload it directly.
-		 */
-		if ( DRM_COPY_FROM_USER( buffer, data, dwords * sizeof(u32) ) ) {
-			DRM_ERROR( "EFAULT on data, %d dwords\n", dwords );
-			return DRM_ERR(EFAULT);
-		}
-	} else {
-		/* Texture image width is less than the minimum, so we
-		 * need to pad out each image scanline to the minimum
-		 * width.
+		/* Dispatch the indirect buffer.
 		 */
-		for ( i = 0 ; i < tex->height ; i++ ) {
-			if ( DRM_COPY_FROM_USER( buffer, data, tex_width ) ) {
-				DRM_ERROR( "EFAULT on pad, %d bytes\n",
-					   tex_width );
+		buffer = (u32*)((char*)dev_priv->buffers->handle + buf->offset);
+		dwords = size / 4;
+		buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
+		buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+			     RADEON_GMC_BRUSH_NONE |
+			     (format << 8) |
+			     RADEON_GMC_SRC_DATATYPE_COLOR |
+			     RADEON_ROP3_S |
+			     RADEON_DP_SRC_SOURCE_HOST_DATA |
+			     RADEON_GMC_CLR_CMP_CNTL_DIS |
+			     RADEON_GMC_WR_MSK_DIS);
+		
+		buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
+		buffer[3] = 0xffffffff;
+		buffer[4] = 0xffffffff;
+		buffer[5] = (y << 16) | image->x;
+		buffer[6] = (height << 16) | image->width;
+		buffer[7] = dwords;
+		buffer += 8;
+
+		if ( tex_width >= 32 ) {
+			/* Texture image width is larger than the minimum, so we
+			 * can upload it directly.
+			 */
+			if ( DRM_COPY_FROM_USER( buffer, data, 
+						 dwords * sizeof(u32) ) ) {
+				DRM_ERROR( "EFAULT on data, %d dwords\n", 
+					   dwords );
 				return DRM_ERR(EFAULT);
 			}
-			buffer += 8;
-			data += tex_width;
+		} else {
+			/* Texture image width is less than the minimum, so we
+			 * need to pad out each image scanline to the minimum
+			 * width.
+			 */
+			for ( i = 0 ; i < tex->height ; i++ ) {
+				if ( DRM_COPY_FROM_USER( buffer, data, 
+							 tex_width ) ) {
+					DRM_ERROR( "EFAULT on pad, %d bytes\n",
+						   tex_width );
+					return DRM_ERR(EFAULT);
+				}
+				buffer += 8;
+				data += tex_width;
+			}
 		}
-	}
 
-	buf->pid = DRM_CURRENTPID;
-	buf->used = (dwords + 8) * sizeof(u32);
+		buf->pid = DRM_CURRENTPID;
+		buf->used = (dwords + 8) * sizeof(u32);
+		radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
+		radeon_cp_discard_buffer( dev, buf );
 
-	radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
-	radeon_cp_discard_buffer( dev, buf );
+	} while (image->height > 0);
 
 	/* Flush the pixel cache after the blit completes.  This ensures
 	 * the texture data is written out to memory before rendering
 	 * continues.
 	 */
 	BEGIN_RING( 4 );
-
 	RADEON_FLUSH_CACHE();
 	RADEON_WAIT_UNTIL_2D_IDLE();
-
 	ADVANCE_RING();
-
-	return ret;
+	return 0;
 }
 
 
diff --git a/shared/drm.h b/shared/drm.h
index f26d4442..d1d66943 100644
--- a/shared/drm.h
+++ b/shared/drm.h
@@ -346,17 +346,30 @@ typedef struct drm_irq_busid {
 } drm_irq_busid_t;
 
 typedef enum {
-    _DRM_VBLANK_ABSOLUTE = 0x0,	/* Wait for specific vblank sequence number */
-    _DRM_VBLANK_RELATIVE = 0x1	/* Wait for given number of vblanks */
+    _DRM_VBLANK_ABSOLUTE = 0x0,		/* Wait for specific vblank sequence number */
+    _DRM_VBLANK_RELATIVE = 0x1,		/* Wait for given number of vblanks */
+    _DRM_VBLANK_SIGNAL   = 0x40000000	/* Send signal instead of blocking */
 } drm_vblank_seq_type_t;
 
-typedef struct drm_radeon_vbl_wait {
+#define _DRM_VBLANK_FLAGS_MASK _DRM_VBLANK_SIGNAL
+
+struct drm_wait_vblank_request {
+	drm_vblank_seq_type_t type;
+	unsigned int sequence;
+	unsigned long signal;
+};
+
+struct drm_wait_vblank_reply {
 	drm_vblank_seq_type_t type;
 	unsigned int sequence;
 	long tval_sec;
 	long tval_usec;
-} drm_wait_vblank_t;
+};
 
+typedef union drm_wait_vblank {
+	struct drm_wait_vblank_request request;
+	struct drm_wait_vblank_reply reply;
+} drm_wait_vblank_t;
 
 typedef struct drm_agp_mode {
 	unsigned long mode;
diff --git a/shared/mga_irq.c b/shared/mga_irq.c
index 568d193f..28e9a262 100644
--- a/shared/mga_irq.c
+++ b/shared/mga_irq.c
@@ -50,6 +50,7 @@ void mga_dma_service( DRM_IRQ_ARGS )
 		MGA_WRITE( MGA_ICLEAR, MGA_VLINEICLR );
 		atomic_inc(&dev->vbl_received);
 		DRM_WAKEUP(&dev->vbl_queue);
+		DRM(vbl_send_signals)( dev );
 	}
 }
 
@@ -64,7 +65,7 @@ int mga_vblank_wait(drm_device_t *dev, unsigned int *sequence)
 	 */
 	DRM_WAIT_ON( ret, dev->vbl_queue, 3*DRM_HZ, 
 		     ( ( ( cur_vblank = atomic_read(&dev->vbl_received ) )
-			 + ~*sequence + 1 ) <= (1<<23) ) );
+			 - *sequence ) <= (1<<23) ) );
 
 	*sequence = cur_vblank;
 
diff --git a/shared/r128_irq.c b/shared/r128_irq.c
index a29a81b5..bfc30405 100644
--- a/shared/r128_irq.c
+++ b/shared/r128_irq.c
@@ -50,6 +50,7 @@ void r128_dma_service( DRM_IRQ_ARGS )
 		R128_WRITE( R128_GEN_INT_STATUS, R128_CRTC_VBLANK_INT_AK );
 		atomic_inc(&dev->vbl_received);
 		DRM_WAKEUP(&dev->vbl_queue);
+		DRM(vbl_send_signals)( dev );
 	}
 }
 
@@ -64,7 +65,7 @@ int DRM(vblank_wait)(drm_device_t *dev, unsigned int *sequence)
 	 */
 	DRM_WAIT_ON( ret, dev->vbl_queue, 3*DRM_HZ, 
 		     ( ( ( cur_vblank = atomic_read(&dev->vbl_received ) )
-			 + ~*sequence + 1 ) <= (1<<23) ) );
+			 - *sequence ) <= (1<<23) ) );
 
 	*sequence = cur_vblank;
 
diff --git a/shared/radeon_irq.c b/shared/radeon_irq.c
index 54702bee..c5cd61c5 100644
--- a/shared/radeon_irq.c
+++ b/shared/radeon_irq.c
@@ -70,13 +70,12 @@ void DRM(dma_service)( DRM_IRQ_ARGS )
 		DRM_WAKEUP( &dev_priv->swi_queue );
 	}
 
-#if __HAVE_VBL_IRQ
 	/* VBLANK interrupt */
 	if (stat & RADEON_CRTC_VBLANK_STAT) {
 		atomic_inc(&dev->vbl_received);
 		DRM_WAKEUP(&dev->vbl_queue);
+		DRM(vbl_send_signals)( dev );
 	}
-#endif
 
 	/* Acknowledge all the bits in GEN_INT_STATUS -- seem to get
 	 * more than we asked for...
@@ -138,7 +137,6 @@ int radeon_emit_and_wait_irq(drm_device_t *dev)
 }
 
 
-#if __HAVE_VBL_IRQ
 int DRM(vblank_wait)(drm_device_t *dev, unsigned int *sequence)
 {
   	drm_radeon_private_t *dev_priv = 
@@ -161,13 +159,12 @@ int DRM(vblank_wait)(drm_device_t *dev, unsigned int *sequence)
 	 */
 	DRM_WAIT_ON( ret, dev->vbl_queue, 3*DRM_HZ, 
 		     ( ( ( cur_vblank = atomic_read(&dev->vbl_received ) )
-			 + ~*sequence + 1 ) <= (1<<23) ) );
+			 - *sequence ) <= (1<<23) ) );
 
 	*sequence = cur_vblank;
 
 	return ret;
 }
-#endif
 
 
 /* Needs the lock as it touches the ring.
diff --git a/shared/radeon_state.c b/shared/radeon_state.c
index 7b480a7e..1fe007b3 100644
--- a/shared/radeon_state.c
+++ b/shared/radeon_state.c
@@ -1074,19 +1074,30 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 	const u8 *data;
 	int size, dwords, tex_width, blit_width;
 	u32 y, height;
-	int ret = 0, i;
+	int i;
 	RING_LOCALS;
 
 	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
 
-	/* FIXME: Be smarter about this...
+	/* Flush the pixel cache.  This ensures no pixel data gets mixed
+	 * up with the texture data from the host data blit, otherwise
+	 * part of the texture image may be corrupted.
 	 */
-	buf = radeon_freelist_get( dev );
-	if ( !buf ) return DRM_ERR(EAGAIN);
+	BEGIN_RING( 4 );
+	RADEON_FLUSH_CACHE();
+	RADEON_WAIT_UNTIL_IDLE();
+	ADVANCE_RING();
+
+#ifdef __BIG_ENDIAN
+	/* The Mesa texture functions provide the data in little endian as the
+	 * chip wants it, but we need to compensate for the fact that the CP
+	 * ring gets byte-swapped
+	 */
+	BEGIN_RING( 2 );
+	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
+	ADVANCE_RING();
+#endif
 
-	DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
-		   tex->offset >> 10, tex->pitch, tex->format,
-		   image->x, image->y, image->width, image->height );
 
 	/* The compiler won't optimize away a division by a variable,
 	 * even if the only legal values are powers of two.  Thus, we'll
@@ -1120,127 +1131,113 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 		return DRM_ERR(EINVAL);
 	}
 
-	DRM_DEBUG( "   tex=%dx%d  blit=%d\n",
-		   tex_width, tex->height, blit_width );
-
-	/* Flush the pixel cache.  This ensures no pixel data gets mixed
-	 * up with the texture data from the host data blit, otherwise
-	 * part of the texture image may be corrupted.
-	 */
-	BEGIN_RING( 4 );
-
-	RADEON_FLUSH_CACHE();
-	RADEON_WAIT_UNTIL_IDLE();
-
-	ADVANCE_RING();
-
-#ifdef __BIG_ENDIAN
-	/* The Mesa texture functions provide the data in little endian as the
-	 * chip wants it, but we need to compensate for the fact that the CP
-	 * ring gets byte-swapped
-	 */
-	BEGIN_RING( 2 );
-	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
-	ADVANCE_RING();
-#endif
-
-	/* Make a copy of the parameters in case we have to update them
-	 * for a multi-pass texture blit.
-	 */
-	y = image->y;
-	height = image->height;
-	data = (const u8 *)image->data;
-
-	size = height * blit_width;
+	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
 
-	if ( size > RADEON_MAX_TEXTURE_SIZE ) {
-		/* Texture image is too large, do a multipass upload */
-		ret = DRM_ERR(EAGAIN);
+	do {
+		DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
+			   tex->offset >> 10, tex->pitch, tex->format,
+			   image->x, image->y, image->width, image->height );
 
-		/* Adjust the blit size to fit the indirect buffer */
-		height = RADEON_MAX_TEXTURE_SIZE / blit_width;
+		/* Make a copy of the parameters in case we have to
+		 * update them for a multi-pass texture blit.
+		 */
+		y = image->y;
+		height = image->height;
+		data = (const u8 *)image->data;
+		
 		size = height * blit_width;
 
+		if ( size > RADEON_MAX_TEXTURE_SIZE ) {
+			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
+			size = height * blit_width;
+		} else if ( size < 4 && size > 0 ) {
+			size = 4;
+		} else if ( size == 0 ) {
+			return 0;
+		}
+
 		/* Update the input parameters for next time */
 		image->y += height;
 		image->height -= height;
-		image->data = (const char *)image->data + size;
+		image->data += size;
 
-		if ( DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ) ) {
-			DRM_ERROR( "EFAULT on tex->image\n" );
-			return DRM_ERR(EFAULT);
+		buf = radeon_freelist_get( dev );
+		if ( 0 && !buf ) {
+			radeon_do_cp_idle( dev_priv );
+			buf = radeon_freelist_get( dev );
+		}
+		if ( !buf ) {
+			DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
+			DRM_COPY_TO_USER( tex->image, image, sizeof(*image) );
+			return DRM_ERR(EAGAIN);
 		}
-	} else if ( size < 4 && size > 0 ) {
-		size = 4;
-	}
 
-	dwords = size / 4;
 
-	/* Dispatch the indirect buffer.
-	 */
-	buffer = (u32 *)((char *)dev_priv->buffers->handle + buf->offset);
-
-	buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
-	buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
-		     RADEON_GMC_BRUSH_NONE |
-		     (format << 8) |
-		     RADEON_GMC_SRC_DATATYPE_COLOR |
-		     RADEON_ROP3_S |
-		     RADEON_DP_SRC_SOURCE_HOST_DATA |
-		     RADEON_GMC_CLR_CMP_CNTL_DIS |
-		     RADEON_GMC_WR_MSK_DIS);
-
-	buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
-	buffer[3] = 0xffffffff;
-	buffer[4] = 0xffffffff;
-	buffer[5] = (y << 16) | image->x;
-	buffer[6] = (height << 16) | image->width;
-	buffer[7] = dwords;
-
-	buffer += 8;
-
-	if ( tex_width >= 32 ) {
-		/* Texture image width is larger than the minimum, so we
-		 * can upload it directly.
-		 */
-		if ( DRM_COPY_FROM_USER( buffer, data, dwords * sizeof(u32) ) ) {
-			DRM_ERROR( "EFAULT on data, %d dwords\n", dwords );
-			return DRM_ERR(EFAULT);
-		}
-	} else {
-		/* Texture image width is less than the minimum, so we
-		 * need to pad out each image scanline to the minimum
-		 * width.
+		/* Dispatch the indirect buffer.
 		 */
-		for ( i = 0 ; i < tex->height ; i++ ) {
-			if ( DRM_COPY_FROM_USER( buffer, data, tex_width ) ) {
-				DRM_ERROR( "EFAULT on pad, %d bytes\n",
-					   tex_width );
+		buffer = (u32*)((char*)dev_priv->buffers->handle + buf->offset);
+		dwords = size / 4;
+		buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
+		buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+			     RADEON_GMC_BRUSH_NONE |
+			     (format << 8) |
+			     RADEON_GMC_SRC_DATATYPE_COLOR |
+			     RADEON_ROP3_S |
+			     RADEON_DP_SRC_SOURCE_HOST_DATA |
+			     RADEON_GMC_CLR_CMP_CNTL_DIS |
+			     RADEON_GMC_WR_MSK_DIS);
+		
+		buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
+		buffer[3] = 0xffffffff;
+		buffer[4] = 0xffffffff;
+		buffer[5] = (y << 16) | image->x;
+		buffer[6] = (height << 16) | image->width;
+		buffer[7] = dwords;
+		buffer += 8;
+
+		if ( tex_width >= 32 ) {
+			/* Texture image width is larger than the minimum, so we
+			 * can upload it directly.
+			 */
+			if ( DRM_COPY_FROM_USER( buffer, data, 
+						 dwords * sizeof(u32) ) ) {
+				DRM_ERROR( "EFAULT on data, %d dwords\n", 
+					   dwords );
 				return DRM_ERR(EFAULT);
 			}
-			buffer += 8;
-			data += tex_width;
+		} else {
+			/* Texture image width is less than the minimum, so we
+			 * need to pad out each image scanline to the minimum
+			 * width.
+			 */
+			for ( i = 0 ; i < tex->height ; i++ ) {
+				if ( DRM_COPY_FROM_USER( buffer, data, 
+							 tex_width ) ) {
+					DRM_ERROR( "EFAULT on pad, %d bytes\n",
+						   tex_width );
+					return DRM_ERR(EFAULT);
+				}
+				buffer += 8;
+				data += tex_width;
+			}
 		}
-	}
 
-	buf->pid = DRM_CURRENTPID;
-	buf->used = (dwords + 8) * sizeof(u32);
+		buf->pid = DRM_CURRENTPID;
+		buf->used = (dwords + 8) * sizeof(u32);
+		radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
+		radeon_cp_discard_buffer( dev, buf );
 
-	radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
-	radeon_cp_discard_buffer( dev, buf );
+	} while (image->height > 0);
 
 	/* Flush the pixel cache after the blit completes.  This ensures
 	 * the texture data is written out to memory before rendering
 	 * continues.
 	 */
 	BEGIN_RING( 4 );
-
 	RADEON_FLUSH_CACHE();
 	RADEON_WAIT_UNTIL_2D_IDLE();
-
 	ADVANCE_RING();
-
-	return ret;
+	return 0;
 }