Import mach64 dri driver from mesa 7.0.3.7.0.4 7.0.3

author: Luc Verhaegen <libv@skynet.be> 2010-03-09 07:31:48 +0100
committer: Luc Verhaegen <libv@skynet.be> 2010-03-09 07:31:48 +0100
commit: 6f692a811778c3481b404e9c9a2d68504c39af08 (patch)
tree: a1ee6fe7581f8127dcadbf7d4189491718dc3cb6 /src
parent: 5dee9b7b19c1aa3a13618b08bc24f00677b5364b (diff)
28 files changed, 10768 insertions, 5 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index aa854c5..5591852 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,7 +1,20 @@
 AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
 
-xxx_dri_la_LTLIBRARIES = xxx_dri.la
-xxx_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) -Iserver
-xxx_dri_la_LDFLAGS = -module -noprefix -lm -ldl $(DRM_LIBS) $(DRI_LIBS)
-xxx_dri_ladir = @libdir@/dri
-xxx_dri_la_SOURCES = \
+mach64_dri_la_LTLIBRARIES = mach64_dri.la
+mach64_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) -Iserver
+mach64_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \
+	$(DRM_LIBS) $(DRI_LIBS)
+mach64_dri_ladir = @libdir@/dri
+mach64_dri_la_SOURCES = \
+	mach64_context.c \
+	mach64_ioctl.c \
+	mach64_screen.c \
+	mach64_span.c \
+	mach64_state.c \
+	mach64_tex.c \
+	mach64_texmem.c \
+	mach64_texstate.c \
+	mach64_tris.c \
+	mach64_vb.c \
+	mach64_dd.c \
+	mach64_lock.c
diff --git a/src/mach64_context.c b/src/mach64_context.c
new file mode 100644
index 0000000..ad661e1
--- /dev/null
+++ b/src/mach64_context.c
@@ -0,0 +1,361 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "glheader.h"
+#include "context.h"
+#include "simple_list.h"
+#include "imports.h"
+#include "matrix.h"
+#include "extensions.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_dd.h"
+#include "mach64_span.h"
+#include "mach64_state.h"
+#include "mach64_tex.h"
+#include "mach64_tris.h"
+#include "mach64_vb.h"
+
+#include "utils.h"
+#include "vblank.h"
+
+#define need_GL_ARB_multisample
+#define need_GL_ARB_vertex_buffer_object
+#include "extension_helper.h"
+
+#ifndef MACH64_DEBUG
+int MACH64_DEBUG = (0);
+#endif
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "sync",   DEBUG_ALWAYS_SYNC },
+    { "api",    DEBUG_VERBOSE_API },
+    { "msg",    DEBUG_VERBOSE_MSG },
+    { "lru",    DEBUG_VERBOSE_LRU },
+    { "dri",    DEBUG_VERBOSE_DRI },
+    { "ioctl",  DEBUG_VERBOSE_IOCTL },
+    { "prims",  DEBUG_VERBOSE_PRIMS },
+    { "count",  DEBUG_VERBOSE_COUNT },
+    { "nowait", DEBUG_NOWAIT },
+    { "fall",   DEBUG_VERBOSE_FALLBACK },
+    { NULL,    0 }
+};
+
+const struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_multisample",                GL_ARB_multisample_functions },
+    { "GL_ARB_multitexture",               NULL },
+    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
+    { "GL_EXT_texture_edge_clamp",         NULL },
+    { "GL_MESA_ycbcr_texture",             NULL },
+    { "GL_SGIS_generate_mipmap",           NULL },
+    { NULL,                                NULL }
+};
+
+
+/* Create the device specific context.
+  */
+GLboolean mach64CreateContext( const __GLcontextModes *glVisual,
+			       __DRIcontextPrivate *driContextPriv,
+                               void *sharedContextPrivate )
+{
+   GLcontext *ctx, *shareCtx;
+   __DRIscreenPrivate *driScreen = driContextPriv->driScreenPriv;
+   struct dd_function_table functions;
+   mach64ContextPtr mmesa;
+   mach64ScreenPtr mach64Screen;
+   int i, heap;
+   GLuint *c_textureSwapsPtr = NULL;
+
+#if DO_DEBUG
+   MACH64_DEBUG = driParseDebugString(getenv("MACH64_DEBUG"), debug_control);
+#endif
+
+   /* Allocate the mach64 context */
+   mmesa = (mach64ContextPtr) CALLOC( sizeof(*mmesa) );
+   if ( !mmesa ) 
+      return GL_FALSE;
+
+   /* Init default driver functions then plug in our Mach64-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions( &functions );
+   mach64InitDriverFuncs( &functions );
+   mach64InitIoctlFuncs( &functions );
+   mach64InitTextureFuncs( &functions );
+
+   /* Allocate the Mesa context */
+   if (sharedContextPrivate)
+      shareCtx = ((mach64ContextPtr) sharedContextPrivate)->glCtx;
+   else 
+      shareCtx = NULL;
+   mmesa->glCtx = _mesa_create_context(glVisual, shareCtx, 
+					&functions, (void *)mmesa);
+   if (!mmesa->glCtx) {
+      FREE(mmesa);
+      return GL_FALSE;
+   }
+   driContextPriv->driverPrivate = mmesa;
+   ctx = mmesa->glCtx;
+
+   mmesa->driContext = driContextPriv;
+   mmesa->driScreen = driScreen;
+   mmesa->driDrawable = NULL;
+   mmesa->hHWContext = driContextPriv->hHWContext;
+   mmesa->driHwLock = &driScreen->pSAREA->lock;
+   mmesa->driFd = driScreen->fd;
+
+   mach64Screen = mmesa->mach64Screen = (mach64ScreenPtr)driScreen->private;
+
+   /* Parse configuration files */
+   driParseConfigFiles (&mmesa->optionCache, &mach64Screen->optionCache,
+                        mach64Screen->driScreen->myNum, "mach64");
+
+   mmesa->sarea = (drm_mach64_sarea_t *)((char *)driScreen->pSAREA +
+				    sizeof(drm_sarea_t));
+
+   mmesa->CurrentTexObj[0] = NULL;
+   mmesa->CurrentTexObj[1] = NULL;
+
+   (void) memset( mmesa->texture_heaps, 0, sizeof( mmesa->texture_heaps ) );
+   make_empty_list( &mmesa->swapped );
+
+   mmesa->firstTexHeap = mach64Screen->firstTexHeap;
+   mmesa->lastTexHeap = mach64Screen->firstTexHeap + mach64Screen->numTexHeaps;
+
+   for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) {
+      mmesa->texture_heaps[i] = driCreateTextureHeap( i, mmesa,
+	    mach64Screen->texSize[i],
+	    6, /* align to 64-byte boundary, use 12 for page-size boundary */
+	    MACH64_NR_TEX_REGIONS,
+	    (drmTextureRegionPtr)mmesa->sarea->tex_list[i],
+	    &mmesa->sarea->tex_age[i],
+	    &mmesa->swapped,
+	    sizeof( mach64TexObj ),
+	    (destroy_texture_object_t *) mach64DestroyTexObj );
+
+#if ENABLE_PERF_BOXES
+      c_textureSwapsPtr = & mmesa->c_textureSwaps;
+#endif
+      driSetTextureSwapCounterLocation( mmesa->texture_heaps[i],
+					c_textureSwapsPtr );
+   }
+
+   mmesa->RenderIndex = -1;		/* Impossible value */
+   mmesa->vert_buf = NULL;
+   mmesa->num_verts = 0;
+   mmesa->new_state = MACH64_NEW_ALL;
+   mmesa->dirty = MACH64_UPLOAD_ALL;
+
+   /* Set the maximum texture size small enough that we can
+    * guarentee that both texture units can bind a maximal texture
+    * and have them both in memory (on-card or AGP) at once.
+    * Test for 2 textures * bytes/texel * size * size.  There's no
+    * need to account for mipmaps since we only upload one level.
+    */
+
+   ctx->Const.MaxTextureUnits = 2;
+   ctx->Const.MaxTextureImageUnits = 2;
+   ctx->Const.MaxTextureCoordUnits = 2;
+
+   heap = mach64Screen->IsPCI ? MACH64_CARD_HEAP : MACH64_AGP_HEAP;
+
+   driCalculateMaxTextureLevels( & mmesa->texture_heaps[heap],
+				 1,
+				 & ctx->Const,
+				 mach64Screen->cpp,
+				 10, /* max 2D texture size is 1024x1024 */
+				 0,  /* 3D textures unsupported. */
+				 0,  /* cube textures unsupported. */
+				 0,  /* texture rectangles unsupported. */
+				 1,  /* mipmapping unsupported. */
+				 GL_TRUE, /* need to have both textures in
+					     either local or AGP memory */
+				 0 );
+
+#if ENABLE_PERF_BOXES
+   mmesa->boxes = ( getenv( "LIBGL_PERFORMANCE_BOXES" ) != NULL );
+#endif
+
+   /* Allocate the vertex buffer
+    */
+   mmesa->vert_buf = ALIGN_MALLOC(MACH64_BUFFER_SIZE, 32);
+   if ( !mmesa->vert_buf )
+      return GL_FALSE;
+   mmesa->vert_used = 0;
+   mmesa->vert_total = MACH64_BUFFER_SIZE;
+   
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+
+   /* Install the customized pipeline:
+    */
+/*     _tnl_destroy_pipeline( ctx ); */
+/*     _tnl_install_pipeline( ctx, mach64_pipeline ); */
+
+   /* Configure swrast and T&L to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+   _tnl_allow_pixel_fog( ctx, GL_FALSE );
+   _tnl_allow_vertex_fog( ctx, GL_TRUE );
+
+   driInitExtensions( ctx, card_extensions, GL_TRUE );
+
+   mach64InitVB( ctx );
+   mach64InitTriFuncs( ctx );
+   mach64DDInitStateFuncs( ctx );
+   mach64DDInitSpanFuncs( ctx );
+   mach64DDInitState( mmesa );
+
+   mmesa->do_irqs = (mmesa->mach64Screen->irq && !getenv("MACH64_NO_IRQS"));
+
+   mmesa->vblank_flags = (mmesa->do_irqs)
+      ? driGetDefaultVBlankFlags(&mmesa->optionCache) : VBLANK_FLAG_NO_IRQ;
+
+   driContextPriv->driverPrivate = (void *)mmesa;
+
+   if (driQueryOptionb(&mmesa->optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(mmesa, MACH64_FALLBACK_DISABLE, 1);
+   }
+
+   return GL_TRUE;
+}
+
+/* Destroy the device specific context.
+ */
+void mach64DestroyContext( __DRIcontextPrivate *driContextPriv  )
+{
+   mach64ContextPtr mmesa = (mach64ContextPtr) driContextPriv->driverPrivate;
+
+   assert(mmesa);  /* should never be null */
+   if ( mmesa ) {
+      GLboolean   release_texture_heaps;
+
+      release_texture_heaps = (mmesa->glCtx->Shared->RefCount == 1);
+
+      _swsetup_DestroyContext( mmesa->glCtx );
+      _tnl_DestroyContext( mmesa->glCtx );
+      _vbo_DestroyContext( mmesa->glCtx );
+      _swrast_DestroyContext( mmesa->glCtx );
+
+      if (release_texture_heaps) {
+         /* This share group is about to go away, free our private
+          * texture object data.
+          */
+         int i;
+
+         for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) {
+	    driDestroyTextureHeap( mmesa->texture_heaps[i] );
+	    mmesa->texture_heaps[i] = NULL;
+         }
+
+	 assert( is_empty_list( & mmesa->swapped ) );
+      }
+
+      mach64FreeVB( mmesa->glCtx );
+
+      /* Free the vertex buffer */
+      if ( mmesa->vert_buf )
+	 ALIGN_FREE( mmesa->vert_buf );
+      
+      /* free the Mesa context */
+      mmesa->glCtx->DriverCtx = NULL;
+      _mesa_destroy_context(mmesa->glCtx);
+
+      FREE( mmesa );
+   }
+}
+
+/* Force the context `c' to be the current context and associate with it
+ * buffer `b'.
+ */
+GLboolean
+mach64MakeCurrent( __DRIcontextPrivate *driContextPriv,
+                 __DRIdrawablePrivate *driDrawPriv,
+                 __DRIdrawablePrivate *driReadPriv )
+{
+   if ( driContextPriv ) {
+      GET_CURRENT_CONTEXT(ctx);
+      mach64ContextPtr oldMach64Ctx = ctx ? MACH64_CONTEXT(ctx) : NULL;
+      mach64ContextPtr newMach64Ctx = (mach64ContextPtr) driContextPriv->driverPrivate;
+
+      if ( newMach64Ctx != oldMach64Ctx ) {
+	 newMach64Ctx->new_state |= MACH64_NEW_CONTEXT;
+	 newMach64Ctx->dirty = MACH64_UPLOAD_ALL;
+      }
+
+      
+      driDrawableInitVBlank( driDrawPriv, newMach64Ctx->vblank_flags,
+			     &newMach64Ctx->vbl_seq );
+
+      if ( newMach64Ctx->driDrawable != driDrawPriv ) {
+	 newMach64Ctx->driDrawable = driDrawPriv;
+	 mach64CalcViewport( newMach64Ctx->glCtx );
+      }
+
+      _mesa_make_current( newMach64Ctx->glCtx,
+                          (GLframebuffer *) driDrawPriv->driverPrivate,
+                          (GLframebuffer *) driReadPriv->driverPrivate );
+
+
+      newMach64Ctx->new_state |=  MACH64_NEW_CLIP;
+   } else {
+      _mesa_make_current( NULL, NULL, NULL );
+   }
+
+   return GL_TRUE;
+}
+
+
+/* Force the context `c' to be unbound from its buffer.
+ */
+GLboolean
+mach64UnbindContext( __DRIcontextPrivate *driContextPriv )
+{
+   return GL_TRUE;
+}
diff --git a/src/mach64_context.h b/src/mach64_context.h
new file mode 100644
index 0000000..8d89452
--- /dev/null
+++ b/src/mach64_context.h
@@ -0,0 +1,364 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_CONTEXT_H__
+#define __MACH64_CONTEXT_H__
+
+#include "dri_util.h"
+#include "drm.h"
+#include "mach64_drm.h"
+
+#include "mtypes.h"
+
+#include "mach64_reg.h"
+
+#include "texmem.h"
+
+struct mach64_context;
+typedef struct mach64_context mach64ContextRec;
+typedef struct mach64_context *mach64ContextPtr;
+
+#include "mach64_lock.h"
+#include "mach64_screen.h"
+
+/* Experimental driver options */
+#define MACH64_CLIENT_STATE_EMITS       0
+
+/* Performace monitoring */
+#define ENABLE_PERF_BOXES               1
+
+/* Native vertex format */
+#define MACH64_NATIVE_VTXFMT		1
+
+/* Flags for what context state needs to be updated:
+ */
+#define MACH64_NEW_ALPHA		0x0001
+#define MACH64_NEW_DEPTH		0x0002
+#define MACH64_NEW_FOG			0x0004
+#define MACH64_NEW_CLIP			0x0008
+#define MACH64_NEW_CULL			0x0010
+#define MACH64_NEW_MASKS		0x0020
+#define MACH64_NEW_RENDER_UNUSED	0x0040
+#define MACH64_NEW_WINDOW		0x0080
+#define MACH64_NEW_TEXTURE		0x0100
+#define MACH64_NEW_CONTEXT		0x0200
+#define MACH64_NEW_ALL			0x03ff
+
+/* Flags for software fallback cases:
+ */
+#define MACH64_FALLBACK_TEXTURE		0x0001
+#define MACH64_FALLBACK_DRAW_BUFFER	0x0002
+#define MACH64_FALLBACK_READ_BUFFER	0x0004
+#define MACH64_FALLBACK_STENCIL		0x0008
+#define MACH64_FALLBACK_RENDER_MODE	0x0010
+#define MACH64_FALLBACK_LOGICOP		0x0020
+#define MACH64_FALLBACK_SEP_SPECULAR	0x0040
+#define MACH64_FALLBACK_BLEND_EQ	0x0080
+#define MACH64_FALLBACK_BLEND_FUNC	0x0100
+#define MACH64_FALLBACK_DISABLE		0x0200
+
+#define CARD32 GLuint		/* KW: For building in mesa tree */
+
+#if MACH64_NATIVE_VTXFMT
+
+/* The vertex structures.
+ */
+
+/* The size of this union is not of relevence:
+ */
+union mach64_vertex_t {
+   GLfloat f[16];
+   GLuint ui[16];
+   GLushort us2[16][2];
+   GLubyte ub4[16][4];
+};
+
+typedef union mach64_vertex_t mach64Vertex, *mach64VertexPtr;
+
+#else
+
+/* Use the templated vertex format:
+ */
+#define TAG(x) mach64##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#endif /* MACH64_NATIVE_VTXFMT */
+
+/* Subpixel offsets for window coordinates:
+ * These are enough to fix most glean tests except polygonOffset.
+ * There are also still some gaps that show in e.g. the tunnel Mesa demo
+ * or the lament xscreensaver hack.
+ */
+#define SUBPIXEL_X	(0.0125F)
+#define SUBPIXEL_Y	(0.15F)
+
+
+typedef void (*mach64_tri_func)( mach64ContextPtr,
+				   mach64Vertex *,
+				   mach64Vertex *,
+				   mach64Vertex * );
+
+typedef void (*mach64_line_func)( mach64ContextPtr,
+				    mach64Vertex *,
+				    mach64Vertex * );
+
+typedef void (*mach64_point_func)( mach64ContextPtr,
+				     mach64Vertex * );
+
+struct mach64_texture_object {
+   driTextureObject   base;
+
+   GLuint bufAddr;
+
+   GLint heap; /* same as base.heap->heapId */
+
+   /* For communicating values from mach64AllocTexObj(), mach64SetTexImages()
+    * to mach64UpdateTextureUnit(). Alternately, we can use the tObj values or
+    * set the context registers directly.
+    */
+   GLint widthLog2;
+   GLint heightLog2;
+   GLint maxLog2;
+
+   GLint hasAlpha;
+   GLint textureFormat;
+
+   GLboolean BilinearMin;
+   GLboolean BilinearMag;
+   GLboolean ClampS;
+   GLboolean ClampT;
+};
+
+typedef struct mach64_texture_object mach64TexObj, *mach64TexObjPtr;
+
+struct mach64_context {
+   GLcontext *glCtx;
+
+   /* Driver and hardware state management
+    */
+   GLuint new_state;
+   GLuint dirty;			/* Hardware state to be updated */
+   drm_mach64_context_regs_t setup;
+
+   GLuint NewGLState;
+   GLuint Fallback;
+   GLuint SetupIndex;
+   GLuint SetupNewInputs;
+   GLuint RenderIndex;
+   GLfloat hw_viewport[16];
+   GLfloat depth_scale;
+   GLuint vertex_size;
+   GLuint vertex_stride_shift;
+   GLuint vertex_format;
+   GLuint num_verts;
+   GLubyte *verts;		
+
+   CARD32 Color;			/* Current draw color */
+   CARD32 ClearColor;			/* Color used to clear color buffer */
+   CARD32 ClearDepth;			/* Value used to clear depth buffer */
+
+   /* Map GL texture units onto hardware
+    */
+   GLint multitex;
+   GLint tmu_source[2];
+   GLint tex_dest[2];
+
+   /* Texture object bookkeeping
+    */
+   mach64TexObjPtr CurrentTexObj[2];
+
+   GLint firstTexHeap, lastTexHeap;
+   driTexHeap *texture_heaps[MACH64_NR_TEX_HEAPS];
+   driTextureObject swapped;
+
+   /* Fallback rasterization functions
+    */
+   mach64_point_func draw_point;
+   mach64_line_func draw_line;
+   mach64_tri_func draw_tri;
+
+   /* Culling */
+   GLfloat backface_sign;
+
+   /* DMA buffers
+    */
+   void *vert_buf;
+   size_t vert_total;
+   unsigned vert_used;
+
+   GLuint hw_primitive;
+   GLenum render_primitive;
+
+   /* Visual, drawable, cliprect and scissor information
+    */
+   GLint drawOffset, drawPitch;
+   GLint drawX, drawY;                  /* origin of drawable in draw buffer */
+   GLint readOffset, readPitch;
+
+   GLuint numClipRects;			/* Cliprects for the draw buffer */
+   drm_clip_rect_t *pClipRects;
+
+   GLint scissor;
+   drm_clip_rect_t ScissorRect;	/* Current software scissor */
+
+   /* Mirrors of some DRI state
+    */
+   __DRIcontextPrivate	*driContext;	/* DRI context */
+   __DRIscreenPrivate	*driScreen;	/* DRI screen */
+   __DRIdrawablePrivate	*driDrawable;	/* DRI drawable bound to this ctx */
+
+   unsigned int lastStamp;		/* mirror driDrawable->lastStamp */
+
+   drm_context_t hHWContext;
+   drm_hw_lock_t *driHwLock;
+   int driFd;
+
+   mach64ScreenPtr mach64Screen;	/* Screen private DRI data */
+   drm_mach64_sarea_t *sarea;		/* Private SAREA data */
+
+   GLuint hardwareWentIdle;
+
+#if ENABLE_PERF_BOXES
+   /* Performance counters
+    */
+   GLuint boxes;			/* Draw performance boxes */
+   GLuint c_clears;
+   GLuint c_drawWaits;
+   GLuint c_textureSwaps;
+   GLuint c_textureBytes;
+   GLuint c_agpTextureBytes;
+   GLuint c_texsrc_agp;
+   GLuint c_texsrc_card;
+   GLuint c_vertexBuffers;
+#endif
+
+   /* VBI
+    */
+   GLuint vbl_seq;
+   GLuint vblank_flags;
+   GLuint do_irqs;
+
+   /* Configuration cache
+    */
+   driOptionCache optionCache;
+};
+
+#define MACH64_CONTEXT(ctx)		((mach64ContextPtr)(ctx->DriverCtx))
+
+
+extern GLboolean mach64CreateContext( const __GLcontextModes *glVisual,
+				      __DRIcontextPrivate *driContextPriv,
+                                      void *sharedContextPrivate );
+
+extern void mach64DestroyContext( __DRIcontextPrivate * );
+
+extern GLboolean mach64MakeCurrent( __DRIcontextPrivate *driContextPriv,
+                                    __DRIdrawablePrivate *driDrawPriv,
+                                    __DRIdrawablePrivate *driReadPriv );
+
+extern GLboolean mach64UnbindContext( __DRIcontextPrivate *driContextPriv );
+
+/* ================================================================
+ * Byte ordering
+ */
+#if MESA_LITTLE_ENDIAN == 1
+#define LE32_IN( x )		( *(GLuint *)(x) )
+#define LE32_IN_FLOAT( x )	( *(GLfloat *)(x) )
+#define LE32_OUT( x, y )	do { *(GLuint *)(x) = (y); } while (0)
+#define LE32_OUT_FLOAT( x, y )	do { *(GLfloat *)(x) = (y); } while (0)
+#else
+#include <byteswap.h>
+#define LE32_IN( x )		bswap_32( *(GLuint *)(x) )
+#define LE32_IN_FLOAT( x )						\
+({									\
+   GLuint __tmp = bswap_32( *(GLuint *)(x) );				\
+   *(GLfloat *)&__tmp;							\
+})
+#define LE32_OUT( x, y )	do { *(GLuint *)(x) = bswap_32( y ); } while (0)
+#define LE32_OUT_FLOAT( x, y )						\
+do {									\
+   GLuint __tmp;							\
+   *(GLfloat *)&__tmp = (y);						\
+   *(GLuint *)(x) = bswap_32( __tmp );					\
+} while (0)
+#endif
+
+/* ================================================================
+ * DMA buffers
+ */
+
+#define DMALOCALS       CARD32 *buf=NULL; int requested=0; int outcount=0
+
+/* called while locked for interleaved client-side state emits */
+#define DMAGETPTR( dwords )					\
+do {								\
+   requested = (dwords);					\
+   buf = (CARD32 *)mach64AllocDmaLocked( mmesa, ((dwords)*4) );	\
+   outcount = 0;						\
+} while(0)
+
+#define DMAOUTREG( reg, val )				\
+do {							\
+   LE32_OUT( &buf[outcount++], ADRINDEX( reg ) );	\
+   LE32_OUT( &buf[outcount++], ( val ) );		\
+} while(0)
+
+#define DMAADVANCE()						\
+do {								\
+   if (outcount < requested) {					\
+      mmesa->vert_used -= (requested - outcount) * 4;	\
+   }								\
+} while(0)
+
+/* ================================================================
+ * Debugging:
+ */
+
+#define DO_DEBUG		1
+
+#if DO_DEBUG
+extern int MACH64_DEBUG;
+#else
+#define MACH64_DEBUG		0
+#endif
+
+#define DEBUG_ALWAYS_SYNC	0x001
+#define DEBUG_VERBOSE_API	0x002
+#define DEBUG_VERBOSE_MSG	0x004
+#define DEBUG_VERBOSE_LRU	0x008
+#define DEBUG_VERBOSE_DRI	0x010
+#define DEBUG_VERBOSE_IOCTL	0x020
+#define DEBUG_VERBOSE_PRIMS	0x040
+#define DEBUG_VERBOSE_COUNT	0x080
+#define DEBUG_NOWAIT		0x100
+#define DEBUG_VERBOSE_FALLBACK	0x200
+#endif /* __MACH64_CONTEXT_H__ */
diff --git a/src/mach64_dd.c b/src/mach64_dd.c
new file mode 100644
index 0000000..17e8d74
--- /dev/null
+++ b/src/mach64_dd.c
@@ -0,0 +1,134 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_state.h"
+#include "mach64_vb.h"
+#include "mach64_dd.h"
+
+#include "context.h"
+#include "utils.h"
+#include "framebuffer.h"
+
+#define DRIVER_DATE	"20051019"
+
+/* Return the current color buffer size.
+ */
+static void mach64DDGetBufferSize( GLframebuffer *buffer,
+				   GLuint *width, GLuint *height )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   LOCK_HARDWARE( mmesa );
+   *width  = mmesa->driDrawable->w;
+   *height = mmesa->driDrawable->h;
+   UNLOCK_HARDWARE( mmesa );
+}
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *mach64DDGetString( GLcontext *ctx, GLenum name )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   static char buffer[128];
+   unsigned   offset;
+   const char * card_name = "Mach64 [Rage Pro]";
+   GLuint agp_mode = mmesa->mach64Screen->IsPCI ? 0 :
+      mmesa->mach64Screen->AGPMode;
+
+   switch ( name ) {
+   case GL_VENDOR:
+      return (GLubyte*)"Gareth Hughes, Leif Delgass, Jos� Fonseca";
+
+   case GL_RENDERER:
+ 
+      offset = driGetRendererString( buffer, card_name, DRIVER_DATE,
+				     agp_mode );
+      return (GLubyte *)buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+/* Send all commands to the hardware.  If vertex buffers or indirect
+ * buffers are in use, then we need to make sure they are sent to the
+ * hardware.  All commands that are normally sent to the ring are
+ * already considered `flushed'.
+ */
+static void mach64DDFlush( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   LOCK_HARDWARE( mmesa );
+   FLUSH_DMA_LOCKED( mmesa );
+   UNLOCK_HARDWARE( mmesa );
+
+#if ENABLE_PERF_BOXES
+   if ( mmesa->boxes ) {
+      LOCK_HARDWARE( mmesa );
+      mach64PerformanceBoxesLocked( mmesa );
+      UNLOCK_HARDWARE( mmesa );
+   }
+
+   /* Log the performance counters if necessary */
+   mach64PerformanceCounters( mmesa );
+#endif
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+static void mach64DDFinish( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_drawWaits++;
+#endif
+
+   mach64DDFlush( ctx );
+   mach64WaitForIdle( mmesa );
+}
+
+/* Initialize the driver's misc functions.
+ */
+void mach64InitDriverFuncs( struct dd_function_table *functions )
+{
+   functions->GetBufferSize	= mach64DDGetBufferSize;
+   functions->GetString	= mach64DDGetString;
+   functions->Finish		= mach64DDFinish;
+   functions->Flush		= mach64DDFlush;
+
+}
diff --git a/src/mach64_dd.h b/src/mach64_dd.h
new file mode 100644
index 0000000..74cf1d3
--- /dev/null
+++ b/src/mach64_dd.h
@@ -0,0 +1,36 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __MACH64_DD_H__
+#define __MACH64_DD_H__
+
+extern void mach64InitDriverFuncs( struct dd_function_table *functions );
+
+#endif
diff --git a/src/mach64_ioctl.c b/src/mach64_ioctl.c
new file mode 100644
index 0000000..36e7d3c
--- /dev/null
+++ b/src/mach64_ioctl.c
@@ -0,0 +1,932 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+#include <errno.h>
+
+#include "mach64_context.h"
+#include "mach64_state.h"
+#include "mach64_ioctl.h"
+#include "mach64_tex.h"
+
+#include "imports.h"
+#include "macros.h"
+
+#include "swrast/swrast.h"
+
+#include "vblank.h"
+
+#define MACH64_TIMEOUT        10 /* the DRM already has a timeout, so keep this small */
+
+
+/* =============================================================
+ * Hardware vertex buffer handling
+ */
+
+/* Get a new VB from the pool of vertex buffers in AGP space.
+ */
+drmBufPtr mach64GetBufferLocked( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->mach64Screen->driScreen->fd;
+   int index = 0;
+   int size = 0;
+   drmDMAReq dma;
+   drmBufPtr buf = NULL;
+   int to = 0;
+   int ret;
+
+   dma.context = mmesa->hHWContext;
+   dma.send_count = 0;
+   dma.send_list = NULL;
+   dma.send_sizes = NULL;
+   dma.flags = 0;
+   dma.request_count = 1;
+   dma.request_size = MACH64_BUFFER_SIZE;
+   dma.request_list = &index;
+   dma.request_sizes = &size;
+   dma.granted_count = 0;
+
+   while ( !buf && ( to++ < MACH64_TIMEOUT ) ) {
+      ret = drmDMA( fd, &dma );
+
+      if ( ret == 0 ) {
+	 buf = &mmesa->mach64Screen->buffers->list[index];
+	 buf->used = 0;
+#if ENABLE_PERF_BOXES
+	 /* Bump the performance counter */
+	 mmesa->c_vertexBuffers++;
+#endif
+	 return buf;
+      }
+   }
+
+   if ( !buf ) {
+      drmCommandNone( fd, DRM_MACH64_RESET );
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "Error: Could not get new VB... exiting\n" );
+      exit( -1 );
+   }
+
+   return buf;
+}
+
+void mach64FlushVerticesLocked( mach64ContextPtr mmesa )
+{
+   drm_clip_rect_t *pbox = mmesa->pClipRects;
+   int nbox = mmesa->numClipRects;
+   void *buffer = mmesa->vert_buf;
+   int count = mmesa->vert_used;
+   int prim = mmesa->hw_primitive;
+   int fd = mmesa->driScreen->fd;
+   drm_mach64_vertex_t vertex;
+   int i;
+
+   mmesa->num_verts = 0;
+   mmesa->vert_used = 0;
+
+   if ( !count )
+      return;
+
+   if ( mmesa->dirty & ~MACH64_UPLOAD_CLIPRECTS )
+      mach64EmitHwStateLocked( mmesa );
+
+   if ( !nbox )
+      count = 0;
+
+   if ( nbox > MACH64_NR_SAREA_CLIPRECTS )
+      mmesa->dirty |= MACH64_UPLOAD_CLIPRECTS;
+
+   if ( !count || !(mmesa->dirty & MACH64_UPLOAD_CLIPRECTS) ) {
+      int to = 0;
+      int ret;
+
+      /* FIXME: Is this really necessary */
+      if ( nbox == 1 )
+	 mmesa->sarea->nbox = 0;
+      else
+	 mmesa->sarea->nbox = nbox;
+
+      vertex.prim = prim;
+      vertex.buf = buffer;
+      vertex.used = count;
+      vertex.discard = 1;
+      do {
+	 ret = drmCommandWrite( fd, DRM_MACH64_VERTEX,
+				&vertex, sizeof(drm_mach64_vertex_t) );
+      } while ( ( ret == -EAGAIN ) && ( to++ < MACH64_TIMEOUT ) );
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "Error flushing vertex buffer: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+   } else {
+
+      for ( i = 0 ; i < nbox ; ) {
+	 int nr = MIN2( i + MACH64_NR_SAREA_CLIPRECTS, nbox );
+	 drm_clip_rect_t *b = mmesa->sarea->boxes;
+	 int discard = 0;
+	 int to = 0;
+	 int ret;
+
+	 mmesa->sarea->nbox = nr - i;
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = pbox[i];
+	 }
+
+	 /* Finished with the buffer?
+	  */
+	 if ( nr == nbox ) {
+	    discard = 1;
+	 }
+
+	 mmesa->sarea->dirty |= MACH64_UPLOAD_CLIPRECTS;
+	 
+	 vertex.prim = prim;
+	 vertex.buf = buffer;
+	 vertex.used = count;
+	 vertex.discard = discard;
+	 do {
+	    ret = drmCommandWrite( fd, DRM_MACH64_VERTEX,
+				   &vertex, sizeof(drm_mach64_vertex_t) );
+	 } while ( ( ret == -EAGAIN ) && ( to++ < MACH64_TIMEOUT ) );
+	 if ( ret ) {
+	    UNLOCK_HARDWARE( mmesa );
+	    fprintf( stderr, "Error flushing vertex buffer: return = %d\n", ret );
+	    exit( -1 );
+	 }
+      }
+   }
+
+   mmesa->dirty &= ~MACH64_UPLOAD_CLIPRECTS;
+}
+
+/* ================================================================
+ * Texture uploads
+ */
+
+void mach64FireBlitLocked( mach64ContextPtr mmesa, void *buffer,
+			   GLint offset, GLint pitch, GLint format,
+			   GLint x, GLint y, GLint width, GLint height )
+{
+   drm_mach64_blit_t blit;
+   int to = 0;
+   int ret;
+
+   blit.buf = buffer;
+   blit.offset = offset;
+   blit.pitch = pitch;
+   blit.format = format;
+   blit.x = x;
+   blit.y = y;
+   blit.width = width;
+   blit.height = height;
+
+   do {
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_BLIT, 
+			     &blit, sizeof(drm_mach64_blit_t) );
+   } while ( ( ret == -EAGAIN ) && ( to++ < MACH64_TIMEOUT ) );
+
+   if ( ret ) {
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "DRM_MACH64_BLIT: return = %d\n", ret );
+      exit( -1 );
+   }
+}
+
+
+/* ================================================================
+ * SwapBuffers with client-side throttling
+ */
+static void delay( void ) {
+/* Prevent an optimizing compiler from removing a spin loop */
+}
+
+/* Throttle the frame rate -- only allow MACH64_MAX_QUEUED_FRAMES
+ * pending swap buffers requests at a time.
+ *
+ * GH: We probably don't want a timeout here, as we can wait as
+ * long as we want for a frame to complete.  If it never does, then
+ * the card has locked.
+ */
+static int mach64WaitForFrameCompletion( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->driFd;
+   int i;
+   int wait = 0;
+   int frames;
+
+   while ( 1 ) {
+      drm_mach64_getparam_t gp;
+      int ret;
+
+      if ( mmesa->sarea->frames_queued < MACH64_MAX_QUEUED_FRAMES ) {
+	 break;
+      }
+
+      if (MACH64_DEBUG & DEBUG_NOWAIT) {
+	 return 1;
+      }
+
+      gp.param = MACH64_PARAM_FRAMES_QUEUED;
+      gp.value = &frames; /* also copied into sarea->frames_queued by DRM */
+
+      ret = drmCommandWriteRead( fd, DRM_MACH64_GETPARAM, &gp, sizeof(gp) );
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_GETPARAM: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+      /* Spin in place a bit so we aren't hammering the register */
+      wait++;
+
+      for ( i = 0 ; i < 1024 ; i++ ) {
+	 delay();
+      }
+
+   }
+
+   return wait;
+}
+
+/* Copy the back color buffer to the front color buffer.
+ */
+void mach64CopyBuffer( const __DRIdrawablePrivate *dPriv )
+{
+   mach64ContextPtr mmesa;
+   GLint nbox, i, ret;
+   drm_clip_rect_t *pbox;
+   GLboolean missed_target;
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   mmesa = (mach64ContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "\n********************************\n" );
+      fprintf( stderr, "\n%s( %p )\n\n",
+	       __FUNCTION__, mmesa->glCtx );
+      fflush( stderr );
+   }
+
+   /* Flush any outstanding vertex buffers */
+   FLUSH_BATCH( mmesa );
+
+   LOCK_HARDWARE( mmesa );
+
+   /* Throttle the frame rate -- only allow one pending swap buffers
+    * request at a time.
+    */
+   if ( !mach64WaitForFrameCompletion( mmesa ) ) {
+      mmesa->hardwareWentIdle = 1;
+   } else {
+      mmesa->hardwareWentIdle = 0;
+   }
+
+#if ENABLE_PERF_BOXES
+   if ( mmesa->boxes ) {
+      mach64PerformanceBoxesLocked( mmesa );
+   }
+#endif
+
+   UNLOCK_HARDWARE( mmesa );
+   driWaitForVBlank( dPriv, &mmesa->vbl_seq, mmesa->vblank_flags, &missed_target );
+   LOCK_HARDWARE( mmesa );
+
+   /* use front buffer cliprects */
+   nbox = dPriv->numClipRects;
+   pbox = dPriv->pClipRects;
+
+   for ( i = 0 ; i < nbox ; ) {
+      GLint nr = MIN2( i + MACH64_NR_SAREA_CLIPRECTS , nbox );
+      drm_clip_rect_t *b = mmesa->sarea->boxes;
+      GLint n = 0;
+
+      for ( ; i < nr ; i++ ) {
+	 *b++ = pbox[i];
+	 n++;
+      }
+      mmesa->sarea->nbox = n;
+
+      ret = drmCommandNone( mmesa->driFd, DRM_MACH64_SWAP );
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_SWAP: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   if ( MACH64_DEBUG & DEBUG_ALWAYS_SYNC ) {
+      mach64WaitForIdleLocked( mmesa );
+   }
+
+   UNLOCK_HARDWARE( mmesa );
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT |
+		    MACH64_UPLOAD_MISC |
+		    MACH64_UPLOAD_CLIPRECTS);
+
+#if ENABLE_PERF_BOXES
+   /* Log the performance counters if necessary */
+   mach64PerformanceCounters( mmesa );
+#endif
+}
+
+#if ENABLE_PERF_BOXES
+/* ================================================================
+ * Performance monitoring
+ */
+
+void mach64PerformanceCounters( mach64ContextPtr mmesa )
+{
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_COUNT) {
+      /* report performance counters */
+      fprintf( stderr, "mach64CopyBuffer: vertexBuffers:%i drawWaits:%i clears:%i\n",
+	       mmesa->c_vertexBuffers, mmesa->c_drawWaits, mmesa->c_clears );
+   }
+
+   mmesa->c_vertexBuffers = 0;
+   mmesa->c_drawWaits = 0;
+   mmesa->c_clears = 0;
+
+   if ( mmesa->c_textureSwaps || mmesa->c_textureBytes || mmesa->c_agpTextureBytes ) {
+      if (MACH64_DEBUG & DEBUG_VERBOSE_COUNT) {
+	 fprintf( stderr, "    textureSwaps:%i  textureBytes:%i agpTextureBytes:%i\n",
+		  mmesa->c_textureSwaps, mmesa->c_textureBytes, mmesa->c_agpTextureBytes );
+      }
+      mmesa->c_textureSwaps = 0;
+      mmesa->c_textureBytes = 0;
+      mmesa->c_agpTextureBytes = 0;
+   }
+
+   mmesa->c_texsrc_agp = 0;
+   mmesa->c_texsrc_card = 0;
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_COUNT)
+      fprintf( stderr, "---------------------------------------------------------\n" );
+}
+
+
+void mach64PerformanceBoxesLocked( mach64ContextPtr mmesa )
+{
+   GLint ret;
+   drm_mach64_clear_t clear;
+   GLint x, y, w, h;
+   GLuint color;
+   GLint nbox;
+   GLint x1, y1, x2, y2;
+   drm_clip_rect_t *b = mmesa->sarea->boxes;
+
+   /* save cliprects */
+   nbox = mmesa->sarea->nbox;
+   x1 = b[0].x1;
+   y1 = b[0].y1;
+   x2 = b[0].x2;
+   y2 = b[0].y2;
+ 
+   /* setup a single cliprect and call the clear ioctl for each box */
+   mmesa->sarea->nbox = 1;
+
+   w = h = 8;
+   x = mmesa->drawX;
+   y = mmesa->drawY;
+   b[0].x1 = x;
+   b[0].x2 = x + w;
+   b[0].y1 = y;
+   b[0].y2 = y + h;
+
+   clear.flags = MACH64_BACK;
+   clear.clear_depth = 0;
+
+   /* Red box if DDFinish was called to wait for rendering to complete */
+   if ( mmesa->c_drawWaits ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 0, 0, 0 );
+      
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+			     &clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+   }
+
+   x += w;
+   b[0].x1 = x;
+   b[0].x2 = x + w;
+
+   /* draw a green box if we had to wait for previous frame(s) to complete */
+   if ( !mmesa->hardwareWentIdle ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 0, 255, 0, 0 );
+      
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+			     &clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+   }
+
+   x += w;
+   w = 20;
+   b[0].x1 = x;
+
+   /* show approx. ratio of AGP/card textures used - Blue = AGP, Purple = Card */
+   if ( mmesa->c_texsrc_agp || mmesa->c_texsrc_card ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 0, 0, 255, 0 );
+      w = ((GLfloat)mmesa->c_texsrc_agp / (GLfloat)(mmesa->c_texsrc_agp + mmesa->c_texsrc_card))*20;
+      if (w > 1) {
+
+	 b[0].x2 = x + w;
+
+	 clear.x = x;
+	 clear.y = y;
+	 clear.w = w;
+	 clear.h = h;
+	 clear.clear_color = color;
+
+	 ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+	 if (ret < 0) {
+	    UNLOCK_HARDWARE( mmesa );
+	    fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	    exit( -1 );
+	 }
+      }
+
+      x += w;
+      w = 20 - w;
+
+      if (w > 1) {
+	 b[0].x1 = x;
+	 b[0].x2 = x + w;
+
+	 color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 0, 255, 0 );
+
+	 clear.x = x;
+	 clear.y = y;
+	 clear.w = w;
+	 clear.h = h;
+	 clear.clear_color = color;
+
+	 ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+	 if (ret < 0) {
+	    UNLOCK_HARDWARE( mmesa );
+	    fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	    exit( -1 );
+	 }
+      }
+   }  
+
+   x += w;
+   w = 8;
+   b[0].x1 = x;
+   b[0].x2 = x + w;
+
+   /* Yellow box if we swapped textures */
+   if ( mmesa->c_textureSwaps ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 255, 0, 0 );
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+      
+   }
+
+   h = 4;
+   x += 8;
+   b[0].x1 = x;
+   b[0].y2 = y + h;
+
+   /* Purple bar for card memory texture blits/uploads */
+   if ( mmesa->c_textureBytes ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 0, 255, 0 );
+      w = mmesa->c_textureBytes / 16384;
+      if ( w <= 0 ) 
+	 w = 1; 
+      if (w > (mmesa->driDrawable->w - 44))
+	 w = mmesa->driDrawable->w - 44;
+
+      b[0].x2 = x + w;
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   /* Blue bar for AGP memory texture blits/uploads */
+   if ( mmesa->c_agpTextureBytes ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 0, 0, 255, 0 );
+      w = mmesa->c_agpTextureBytes / 16384;
+      if ( w <= 0 ) 
+	 w = 1; 
+      if (w > (mmesa->driDrawable->w - 44))
+	 w = mmesa->driDrawable->w - 44;
+
+      y += 4;
+      b[0].x2 = x + w;
+      b[0].y1 = y;
+      b[0].y2 = y + h;
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   /* Pink bar for number of vertex buffers used */
+   if ( mmesa->c_vertexBuffers ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 196, 128, 128, 0 );
+
+      w = mmesa->c_vertexBuffers;
+      if (w > (mmesa->driDrawable->w))
+	 w = mmesa->driDrawable->w;
+
+      h = 8;
+      x = mmesa->drawX;
+      y = mmesa->drawY + 8;
+      b[0].x1 = x;
+      b[0].x2 = x + w;
+      b[0].y1 = y;
+      b[0].y2 = y + h;
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   /* restore cliprects */
+   mmesa->sarea->nbox = nbox;
+   b[0].x1 = x1;
+   b[0].y1 = y1;
+   b[0].x2 = x2;
+   b[0].y2 = y2;
+
+}
+
+#endif
+
+/* ================================================================
+ * Buffer clear
+ */
+
+static void mach64DDClear( GLcontext *ctx, GLbitfield mask )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   __DRIdrawablePrivate *dPriv = mmesa->driDrawable;
+   drm_mach64_clear_t clear;
+   GLuint flags = 0;
+   GLint i;
+   GLint ret;
+   GLint cx, cy, cw, ch;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "mach64DDClear\n");
+   }
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_clears++;
+#endif
+
+   FLUSH_BATCH( mmesa );
+
+   /* The only state changes we care about here are the RGBA colormask
+    * and scissor/clipping.  We'll just update that state, if needed.
+    */
+   if ( mmesa->new_state & (MACH64_NEW_MASKS | MACH64_NEW_CLIP) ) {
+      const GLuint save_state = mmesa->new_state;
+      mmesa->new_state &= (MACH64_NEW_MASKS | MACH64_NEW_CLIP);
+      mach64DDUpdateHWState( ctx );
+      mmesa->new_state = save_state & ~(MACH64_NEW_MASKS | MACH64_NEW_CLIP);
+   }
+
+   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
+      flags |= MACH64_FRONT;
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
+   }
+
+   if ( mask & BUFFER_BIT_BACK_LEFT ) {
+      flags |= MACH64_BACK;
+      mask &= ~BUFFER_BIT_BACK_LEFT;
+   }
+
+   if ( ( mask & BUFFER_BIT_DEPTH ) && ctx->Depth.Mask ) {
+      flags |= MACH64_DEPTH;
+      mask &= ~BUFFER_BIT_DEPTH;
+   }
+
+   if ( mask )
+      _swrast_Clear( ctx, mask );
+
+   if ( !flags )
+      return;
+
+   LOCK_HARDWARE( mmesa );
+
+   /* compute region after locking: */
+   cx = ctx->DrawBuffer->_Xmin;
+   cy = ctx->DrawBuffer->_Ymin;
+   cw = ctx->DrawBuffer->_Xmax - cx;
+   ch = ctx->DrawBuffer->_Ymax - cy;
+
+   /* Flip top to bottom */
+   cx += mmesa->drawX;
+   cy  = mmesa->drawY + dPriv->h - cy - ch;
+
+   /* HACK?
+    */
+   if ( mmesa->dirty & ~MACH64_UPLOAD_CLIPRECTS ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+
+   for ( i = 0 ; i < mmesa->numClipRects ; ) {
+      int nr = MIN2( i + MACH64_NR_SAREA_CLIPRECTS, mmesa->numClipRects );
+      drm_clip_rect_t *box = mmesa->pClipRects;
+      drm_clip_rect_t *b = mmesa->sarea->boxes;
+      GLint n = 0;
+
+      if (cw != dPriv->w || ch != dPriv->h) {
+         /* clear subregion */
+	 for ( ; i < nr ; i++ ) {
+	    GLint x = box[i].x1;
+	    GLint y = box[i].y1;
+	    GLint w = box[i].x2 - x;
+	    GLint h = box[i].y2 - y;
+
+	    if ( x < cx ) w -= cx - x, x = cx;
+	    if ( y < cy ) h -= cy - y, y = cy;
+	    if ( x + w > cx + cw ) w = cx + cw - x;
+	    if ( y + h > cy + ch ) h = cy + ch - y;
+	    if ( w <= 0 ) continue;
+	    if ( h <= 0 ) continue;
+
+	    b->x1 = x;
+	    b->y1 = y;
+	    b->x2 = x + w;
+	    b->y2 = y + h;
+	    b++;
+	    n++;
+	 }
+      } else {
+         /* clear whole window */
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = box[i];
+	    n++;
+	 }
+      }
+
+      mmesa->sarea->nbox = n;
+
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL ) {
+	 fprintf( stderr,
+		  "DRM_MACH64_CLEAR: flag 0x%x color %x depth %x nbox %d\n",
+		  flags,
+		  (GLuint)mmesa->ClearColor,
+		  (GLuint)mmesa->ClearDepth,
+		  mmesa->sarea->nbox );
+      }
+
+      clear.flags = flags;
+      clear.x = cx;
+      clear.y = cy;
+      clear.w = cw;
+      clear.h = ch;
+      clear.clear_color = mmesa->ClearColor;
+      clear.clear_depth = mmesa->ClearDepth;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+			     &clear, sizeof(drm_mach64_clear_t) );
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   UNLOCK_HARDWARE( mmesa );
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT |
+		    MACH64_UPLOAD_MISC |
+		    MACH64_UPLOAD_CLIPRECTS);
+
+}
+
+
+void mach64WaitForIdleLocked( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->driFd;
+   int to = 0;
+   int ret;
+
+   do {
+      ret = drmCommandNone( fd, DRM_MACH64_IDLE );
+   } while ( ( ret == -EBUSY ) && ( to++ < MACH64_TIMEOUT ) );
+
+   if ( ret < 0 ) {
+      drmCommandNone( fd, DRM_MACH64_RESET );
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "Error: Mach64 timed out... exiting\n" );
+      exit( -1 );
+   }
+}
+
+/* Flush the DMA queue to the hardware */
+void mach64FlushDMALocked( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->driFd;
+   int ret;
+
+   ret = drmCommandNone( fd, DRM_MACH64_FLUSH );
+
+   if ( ret < 0 ) {
+      drmCommandNone( fd, DRM_MACH64_RESET );
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "Error flushing DMA... exiting\n" );
+      exit( -1 );
+   }
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT |
+		    MACH64_UPLOAD_MISC |
+		    MACH64_UPLOAD_CLIPRECTS);
+
+}
+
+/* For client-side state emits - currently unused */
+void mach64UploadHwStateLocked( mach64ContextPtr mmesa )
+{
+   drm_mach64_sarea_t *sarea = mmesa->sarea;
+   
+   drm_mach64_context_regs_t *regs = &sarea->context_state;
+   unsigned int dirty = sarea->dirty;
+   CARD32 offset = ((regs->tex_size_pitch & 0xf0) >> 2);
+
+   DMALOCALS;
+
+   DMAGETPTR( 19*2 );
+
+   if ( dirty & MACH64_UPLOAD_MISC ) {
+      DMAOUTREG( MACH64_DP_MIX, regs->dp_mix );
+      DMAOUTREG( MACH64_DP_SRC, regs->dp_src );
+      DMAOUTREG( MACH64_CLR_CMP_CNTL, regs->clr_cmp_cntl );
+      DMAOUTREG( MACH64_GUI_TRAJ_CNTL, regs->gui_traj_cntl );
+      DMAOUTREG( MACH64_SC_LEFT_RIGHT, regs->sc_left_right );
+      DMAOUTREG( MACH64_SC_TOP_BOTTOM, regs->sc_top_bottom );
+      sarea->dirty &= ~MACH64_UPLOAD_MISC;
+   }
+
+   if ( dirty & MACH64_UPLOAD_DST_OFF_PITCH ) {
+      DMAOUTREG( MACH64_DST_OFF_PITCH, regs->dst_off_pitch );
+      sarea->dirty &= ~MACH64_UPLOAD_DST_OFF_PITCH;
+   }
+   if ( dirty & MACH64_UPLOAD_Z_OFF_PITCH ) {
+      DMAOUTREG( MACH64_Z_OFF_PITCH, regs->z_off_pitch );
+      sarea->dirty &= ~MACH64_UPLOAD_Z_OFF_PITCH;
+   }
+   if ( dirty & MACH64_UPLOAD_Z_ALPHA_CNTL ) {
+      DMAOUTREG( MACH64_Z_CNTL, regs->z_cntl );
+      DMAOUTREG( MACH64_ALPHA_TST_CNTL, regs->alpha_tst_cntl );
+      sarea->dirty &= ~MACH64_UPLOAD_Z_ALPHA_CNTL;
+   }
+   if ( dirty & MACH64_UPLOAD_SCALE_3D_CNTL ) {
+      DMAOUTREG( MACH64_SCALE_3D_CNTL, regs->scale_3d_cntl );
+      sarea->dirty &= ~MACH64_UPLOAD_SCALE_3D_CNTL;
+   }
+   if ( dirty & MACH64_UPLOAD_DP_FOG_CLR ) {
+      DMAOUTREG( MACH64_DP_FOG_CLR, regs->dp_fog_clr );
+      sarea->dirty &= ~MACH64_UPLOAD_DP_FOG_CLR;
+   }
+   if ( dirty & MACH64_UPLOAD_DP_WRITE_MASK ) {
+      DMAOUTREG( MACH64_DP_WRITE_MASK, regs->dp_write_mask );
+      sarea->dirty &= ~MACH64_UPLOAD_DP_WRITE_MASK;
+   }
+   if ( dirty & MACH64_UPLOAD_DP_PIX_WIDTH ) {
+      DMAOUTREG( MACH64_DP_PIX_WIDTH, regs->dp_pix_width );
+      sarea->dirty &= ~MACH64_UPLOAD_DP_PIX_WIDTH;
+   }
+   if ( dirty & MACH64_UPLOAD_SETUP_CNTL ) {
+      DMAOUTREG( MACH64_SETUP_CNTL, regs->setup_cntl );
+      sarea->dirty &= ~MACH64_UPLOAD_SETUP_CNTL;
+   }
+
+   if ( dirty & MACH64_UPLOAD_TEXTURE ) {
+      DMAOUTREG( MACH64_TEX_SIZE_PITCH, regs->tex_size_pitch );
+      DMAOUTREG( MACH64_TEX_CNTL, regs->tex_cntl );
+      DMAOUTREG( MACH64_SECONDARY_TEX_OFF, regs->secondary_tex_off );
+      DMAOUTREG( MACH64_TEX_0_OFF + offset, regs->tex_offset );
+      sarea->dirty &= ~MACH64_UPLOAD_TEXTURE;
+   }
+
+#if 0
+   if ( dirty & MACH64_UPLOAD_CLIPRECTS ) {
+      DMAOUTREG( MACH64_SC_LEFT_RIGHT, regs->sc_left_right );
+      DMAOUTREG( MACH64_SC_TOP_BOTTOM, regs->sc_top_bottom );
+      sarea->dirty &= ~MACH64_UPLOAD_CLIPRECTS;
+   }
+#endif
+
+   sarea->dirty = 0;
+
+   DMAADVANCE();
+}
+
+void mach64InitIoctlFuncs( struct dd_function_table *functions )
+{
+    functions->Clear = mach64DDClear;
+}
diff --git a/src/mach64_ioctl.h b/src/mach64_ioctl.h
new file mode 100644
index 0000000..52fe863
--- /dev/null
+++ b/src/mach64_ioctl.h
@@ -0,0 +1,148 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_IOCTL_H__
+#define __MACH64_IOCTL_H__
+
+#include "mach64_dri.h"
+#include "mach64_reg.h"
+#include "mach64_lock.h"
+
+#define MACH64_BUFFER_MAX_DWORDS	(MACH64_BUFFER_SIZE / sizeof(CARD32))
+
+
+extern drmBufPtr mach64GetBufferLocked( mach64ContextPtr mmesa );
+extern void mach64FlushVerticesLocked( mach64ContextPtr mmesa );
+extern void mach64FlushDMALocked( mach64ContextPtr mmesa );
+extern void mach64UploadHwStateLocked( mach64ContextPtr mmesa );
+
+static __inline void *mach64AllocDmaLow( mach64ContextPtr mmesa, int bytes )
+{
+   CARD32 *head;
+
+   if ( mmesa->vert_used + bytes > mmesa->vert_total ) {
+      LOCK_HARDWARE( mmesa );
+      mach64FlushVerticesLocked( mmesa );
+      UNLOCK_HARDWARE( mmesa );
+   }
+
+   head = (CARD32 *)((char *)mmesa->vert_buf + mmesa->vert_used);
+   mmesa->vert_used += bytes;
+
+   return head;
+}
+
+static __inline void *mach64AllocDmaLocked( mach64ContextPtr mmesa, int bytes )
+{
+   CARD32 *head;
+
+   if ( mmesa->vert_used + bytes > mmesa->vert_total ) {
+      mach64FlushVerticesLocked( mmesa );
+   }
+
+   head = (CARD32 *)((char *)mmesa->vert_buf + mmesa->vert_used);
+   mmesa->vert_used += bytes;
+
+   return head;
+}
+
+extern void mach64FireBlitLocked( mach64ContextPtr mmesa, void *buffer,
+				  GLint offset, GLint pitch, GLint format,
+				  GLint x, GLint y, GLint width, GLint height );
+
+extern void mach64CopyBuffer( const __DRIdrawablePrivate *dPriv );
+#if ENABLE_PERF_BOXES
+extern void mach64PerformanceCounters( mach64ContextPtr mmesa );
+extern void mach64PerformanceBoxesLocked( mach64ContextPtr mmesa );
+#endif
+extern void mach64WaitForIdleLocked( mach64ContextPtr mmesa );
+
+extern void mach64InitIoctlFuncs( struct dd_function_table *functions );
+
+/* ================================================================
+ * Helper macros:
+ */
+
+#define FLUSH_BATCH( mmesa )						\
+do {									\
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL )				\
+      fprintf( stderr, "FLUSH_BATCH in %s\n", __FUNCTION__ );		\
+   if ( mmesa->vert_used ) {						\
+      mach64FlushVertices( mmesa );					\
+   }									\
+} while (0)
+
+/* According to a comment in ATIMach64Sync (atimach64.c) in the DDX:
+ *
+ * "For VTB's and later, the first CPU read of the framebuffer will return
+ * zeroes [...] This appears to be due to some kind of engine
+ * caching of framebuffer data I haven't found any way of disabling, or
+ * otherwise circumventing."
+ */
+#define FINISH_DMA_LOCKED( mmesa )					\
+do {									\
+   CARD32 _tmp;								\
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL )				\
+      fprintf( stderr, "FINISH_DMA_LOCKED in %s\n", __FUNCTION__ );	\
+   if ( mmesa->vert_used ) {						\
+      mach64FlushVerticesLocked( mmesa );				\
+   }									\
+   mach64WaitForIdleLocked( mmesa );					\
+   /* pre-read framebuffer to counter caching problem */		\
+   _tmp = *(volatile CARD32 *)mmesa->driScreen->pFB;			\
+} while (0)
+
+#define FLUSH_DMA_LOCKED( mmesa )					\
+do {									\
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL )				\
+      fprintf( stderr, "FLUSH_DMA_LOCKED in %s\n", __FUNCTION__ );	\
+   if ( mmesa->vert_used ) {						\
+      mach64FlushVerticesLocked( mmesa );				\
+   }									\
+   mach64FlushDMALocked( mmesa );					\
+} while (0)
+
+#define mach64FlushVertices( mmesa )					\
+do {									\
+   LOCK_HARDWARE( mmesa );						\
+   mach64FlushVerticesLocked( mmesa );					\
+   UNLOCK_HARDWARE( mmesa );						\
+} while (0)
+
+#define mach64WaitForIdle( mmesa )		\
+do {						\
+   LOCK_HARDWARE( mmesa );			\
+   mach64WaitForIdleLocked( mmesa );		\
+   UNLOCK_HARDWARE( mmesa );			\
+} while (0)
+
+
+#endif /* __MACH64_IOCTL_H__ */
diff --git a/src/mach64_lock.c b/src/mach64_lock.c
new file mode 100644
index 0000000..b73e350
--- /dev/null
+++ b/src/mach64_lock.c
@@ -0,0 +1,96 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_state.h"
+#include "mach64_lock.h"
+#include "mach64_tex.h"
+#include "drirenderbuffer.h"
+
+#if DEBUG_LOCKING
+char *prevLockFile = NULL;
+int   prevLockLine = 0;
+#endif
+
+
+/* Update the hardware state.  This is called if another context has
+ * grabbed the hardware lock, which includes the X server.  This
+ * function also updates the driver's window state after the X server
+ * moves, resizes or restacks a window -- the change will be reflected
+ * in the drawable position and clip rects.  Since the X server grabs
+ * the hardware lock when it changes the window state, this routine will
+ * automatically be called after such a change.
+ */
+void mach64GetLock( mach64ContextPtr mmesa, GLuint flags )
+{
+   __DRIdrawablePrivate *dPriv = mmesa->driDrawable;
+   __DRIscreenPrivate *sPriv = mmesa->driScreen;
+   drm_mach64_sarea_t *sarea = mmesa->sarea;
+   int i;
+
+   drmGetLock( mmesa->driFd, mmesa->hHWContext, flags );
+
+   /* The window might have moved, so we might need to get new clip
+    * rects.
+    *
+    * NOTE: This releases and regrabs the hw lock to allow the X server
+    * to respond to the DRI protocol request for new drawable info.
+    * Since the hardware state depends on having the latest drawable
+    * clip rects, all state checking must be done _after_ this call.
+    */
+   DRI_VALIDATE_DRAWABLE_INFO( sPriv, dPriv ); 
+
+   if ( mmesa->lastStamp != dPriv->lastStamp ) {
+      mmesa->lastStamp = dPriv->lastStamp;
+      if (mmesa->glCtx->DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT)
+         mach64SetCliprects( mmesa->glCtx, GL_BACK_LEFT );
+      else
+         mach64SetCliprects( mmesa->glCtx, GL_FRONT_LEFT );
+      driUpdateFramebufferSize( mmesa->glCtx, dPriv );
+      mach64CalcViewport( mmesa->glCtx );
+   }
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT
+		    | MACH64_UPLOAD_MISC
+		    | MACH64_UPLOAD_CLIPRECTS);
+
+   /* EXA render acceleration uses the texture engine, so restore it */
+   mmesa->dirty |= (MACH64_UPLOAD_TEXTURE);
+
+   if ( sarea->ctx_owner != mmesa->hHWContext ) {
+      sarea->ctx_owner = mmesa->hHWContext;
+      mmesa->dirty = MACH64_UPLOAD_ALL;
+   }
+
+   for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) {
+      DRI_AGE_TEXTURES( mmesa->texture_heaps[i] );
+   }
+}
diff --git a/src/mach64_lock.h b/src/mach64_lock.h
new file mode 100644
index 0000000..973880e
--- /dev/null
+++ b/src/mach64_lock.h
@@ -0,0 +1,104 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __MACH64_LOCK_H__
+#define __MACH64_LOCK_H__
+
+extern void mach64GetLock( mach64ContextPtr mmesa, GLuint flags );
+
+
+/* Turn DEBUG_LOCKING on to find locking conflicts.
+ */
+#define DEBUG_LOCKING	1
+
+#if DEBUG_LOCKING
+extern char *prevLockFile;
+extern int   prevLockLine;
+
+#define DEBUG_LOCK()							\
+   do {									\
+      prevLockFile = (__FILE__);					\
+      prevLockLine = (__LINE__);					\
+   } while (0)
+
+#define DEBUG_RESET()							\
+   do {									\
+      prevLockFile = 0;							\
+      prevLockLine = 0;							\
+   } while (0)
+
+#define DEBUG_CHECK_LOCK()						\
+   do {									\
+      if ( prevLockFile ) {						\
+	 fprintf( stderr,						\
+		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
+		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
+	 exit( 1 );							\
+      }									\
+   } while (0)
+
+#else
+
+#define DEBUG_LOCK()
+#define DEBUG_RESET()
+#define DEBUG_CHECK_LOCK()
+
+#endif
+
+/*
+ * !!! We may want to separate locks from locks with validation.  This
+ * could be used to improve performance for those things commands that
+ * do not do any drawing !!!
+ */
+
+/* Lock the hardware and validate our state.
+ */
+#define LOCK_HARDWARE( mmesa )						\
+   do {									\
+      char __ret = 0;							\
+      DEBUG_CHECK_LOCK();						\
+      DRM_CAS( mmesa->driHwLock, mmesa->hHWContext,			\
+	       (DRM_LOCK_HELD | mmesa->hHWContext), __ret );		\
+      if ( __ret )							\
+	 mach64GetLock( mmesa, 0 );					\
+      DEBUG_LOCK();							\
+   } while (0)
+
+/* Unlock the hardware.
+ */
+#define UNLOCK_HARDWARE( mmesa )					\
+   do {									\
+      DRM_UNLOCK( mmesa->driFd,						\
+		  mmesa->driHwLock,					\
+		  mmesa->hHWContext );					\
+      DEBUG_RESET();							\
+   } while (0)
+
+#endif /* __MACH64_LOCK_H__ */
diff --git a/src/mach64_native_vb.c b/src/mach64_native_vb.c
new file mode 100644
index 0000000..81bcf80
--- /dev/null
+++ b/src/mach64_native_vb.c
@@ -0,0 +1,257 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Original authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ *
+ * Adapted to Mach64 by:
+ *    Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "math/m_translate.h"
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+void TAG(translate_vertex)(GLcontext *ctx,
+			   const VERTEX *src,
+			   SWvertex *dst)
+{
+   LOCALVARS
+   GLuint format = GET_VERTEX_FORMAT();
+   UNVIEWPORT_VARS;
+   CARD32 *p = (CARD32 *)src + 10 - mmesa->vertex_size;
+
+   dst->win[3] = 1.0;
+   
+   switch ( format ) {
+      case TEX1_VERTEX_FORMAT:
+#ifdef MACH64_PREMULT_TEXCOORDS
+	 {
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    dst->attrib[FRAG_ATTRIB_TEX1][0] = rhw*LE32_IN_FLOAT( p++ );
+	    dst->attrib[FRAG_ATTRIB_TEX1][1] = rhw*LE32_IN_FLOAT( p++ );
+	 }
+#else
+	 dst->attrib[FRAG_ATTRIB_TEX1][0] = LE32_IN_FLOAT( p++ );
+	 dst->attrib[FRAG_ATTRIB_TEX1][1] = LE32_IN_FLOAT( p++ );
+#endif
+	 dst->attrib[FRAG_ATTRIB_TEX1][3] = 1.0;
+	 p++;
+
+      case TEX0_VERTEX_FORMAT:
+#ifdef MACH64_PREMULT_TEXCOORDS
+	 {
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    dst->attrib[FRAG_ATTRIB_TEX0][0] = rhw*LE32_IN_FLOAT( p++ );
+	    dst->attrib[FRAG_ATTRIB_TEX0][1] = rhw*LE32_IN_FLOAT( p++ );
+	 }
+#else
+	 dst->attrib[FRAG_ATTRIB_TEX0][0] = LE32_IN_FLOAT( p++ );
+	 dst->attrib[FRAG_ATTRIB_TEX0][1] = LE32_IN_FLOAT( p++ );
+#endif
+	 dst->attrib[FRAG_ATTRIB_TEX0][3] = 1.0;
+	 dst->win[3] = LE32_IN_FLOAT( p++ );
+	
+      case NOTEX_VERTEX_FORMAT:
+	 dst->specular[2] = ((GLubyte *)p)[0];
+	 dst->specular[1] = ((GLubyte *)p)[1];
+	 dst->specular[0] = ((GLubyte *)p)[2];
+	 dst->attrib[FRAG_ATTRIB_FOGC][0] = ((GLubyte *)p)[3];
+	 p++;
+
+      case TINY_VERTEX_FORMAT:
+	 dst->win[2] = UNVIEWPORT_Z( LE32_IN( p++ ) );
+
+	 dst->color[2] = ((GLubyte *)p)[0];
+	 dst->color[1] = ((GLubyte *)p)[1];
+	 dst->color[0] = ((GLubyte *)p)[2];
+	 dst->color[3] = ((GLubyte *)p)[3];
+	 p++;
+	 
+	 {
+	    GLuint xy = LE32_IN( p );
+	    
+	    dst->win[0] = UNVIEWPORT_X( (GLfloat)(GLshort)( xy >> 16 ) );
+	    dst->win[1] = UNVIEWPORT_Y( (GLfloat)(GLshort)( xy & 0xffff ) );
+	 }
+   }
+
+   assert( p + 1 - (CARD32 *)src == 10 );
+	 
+   dst->pointSize = ctx->Point._Size;
+}
+
+
+
+void TAG(print_vertex)( GLcontext *ctx, const VERTEX *v )
+{
+   LOCALVARS
+   GLuint format = GET_VERTEX_FORMAT();
+   CARD32 *p = (CARD32 *)v + 10 - mmesa->vertex_size;
+   
+   switch ( format ) {
+      case TEX1_VERTEX_FORMAT:
+	 {
+	    GLfloat u, v, w;
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    u = rhw*LE32_IN_FLOAT( p++ );
+	    v = rhw*LE32_IN_FLOAT( p++ );
+#else
+	    u = LE32_IN_FLOAT( p++ );
+	    v = LE32_IN_FLOAT( p++ );
+#endif
+	    w = LE32_IN_FLOAT( p++ );
+	    fprintf( stderr, "u1 %f v1 %f w1 %f\n", u, v, w );
+	 }
+
+      case TEX0_VERTEX_FORMAT:
+	 {
+	    GLfloat u, v, w;
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    u = rhw*LE32_IN_FLOAT( p++ );
+	    v = rhw*LE32_IN_FLOAT( p++ );
+#else
+	    u = LE32_IN_FLOAT( p++ );
+	    v = LE32_IN_FLOAT( p++ );
+#endif
+	    w = LE32_IN_FLOAT( p++ );
+	    fprintf( stderr, "u0 %f v0 %f w0 %f\n", u, v, w );
+	 }
+	
+      case NOTEX_VERTEX_FORMAT:
+	 {
+	    GLubyte r, g, b, a;
+	    
+	    b = ((GLubyte *)p)[0];
+	    g = ((GLubyte *)p)[1];
+	    r = ((GLubyte *)p)[2];
+	    a = ((GLubyte *)p)[3];
+	    p++;
+	    fprintf(stderr, "spec: r %d g %d b %d a %d\n", r, g, b, a);
+	 }
+
+      case TINY_VERTEX_FORMAT:
+	 {
+	    GLuint xy;
+	    GLfloat x, y, z;
+	    GLubyte r, g, b, a;
+	    
+	    z = LE32_IN( p++ ) / 65536.0;
+
+	    b = ((GLubyte *)p)[0];
+	    g = ((GLubyte *)p)[1];
+	    r = ((GLubyte *)p)[2];
+	    a = ((GLubyte *)p)[3];
+	    p++;
+	    xy = LE32_IN( p );
+	    x = (GLfloat)(GLshort)( xy >> 16 ) / 4.0;
+	    y = (GLfloat)(GLshort)( xy & 0xffff ) / 4.0;
+	    
+	    fprintf(stderr, "x %f y %f z %f\n", x, y, z);
+	    fprintf(stderr, "r %d g %d b %d a %d\n", r, g, b, a);
+	 }
+   }
+   
+   assert( p + 1 - (CARD32 *)v == 10 );	 
+
+   fprintf(stderr, "\n");
+}
+
+/* Interpolate the elements of the VB not included in typical hardware
+ * vertices.  
+ *
+ * NOTE: All these arrays are guarenteed by tnl to be writeable and
+ * have good stride.
+ */
+#ifndef INTERP_QUALIFIER 
+#define INTERP_QUALIFIER static
+#endif
+
+#define GET_COLOR(ptr, idx) ((ptr)->data[idx])
+
+
+INTERP_QUALIFIER void TAG(interp_extras)( GLcontext *ctx,
+					  GLfloat t,
+					  GLuint dst, GLuint out, GLuint in,
+					  GLboolean force_boundary )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   if (VB->ColorPtr[1]) {
+      assert(VB->ColorPtr[1]->stride == 4 * sizeof(GLfloat));
+      
+      INTERP_4F( t,
+		    GET_COLOR(VB->ColorPtr[1], dst),
+		    GET_COLOR(VB->ColorPtr[1], out),
+		    GET_COLOR(VB->ColorPtr[1], in) );
+
+      if (VB->SecondaryColorPtr[1]) {
+	 INTERP_3F( t,
+		       GET_COLOR(VB->SecondaryColorPtr[1], dst),
+		       GET_COLOR(VB->SecondaryColorPtr[1], out),
+		       GET_COLOR(VB->SecondaryColorPtr[1], in) );
+      }
+   }
+
+   if (VB->EdgeFlag) {
+      VB->EdgeFlag[dst] = VB->EdgeFlag[out] || force_boundary;
+   }
+
+   INTERP_VERTEX(ctx, t, dst, out, in, force_boundary);
+}
+
+INTERP_QUALIFIER void TAG(copy_pv_extras)( GLcontext *ctx, 
+					   GLuint dst, GLuint src )
+{
+   LOCALVARS
+      struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   if (VB->ColorPtr[1]) {
+      COPY_4FV( GET_COLOR(VB->ColorPtr[1], dst), 
+		GET_COLOR(VB->ColorPtr[1], src) );
+
+      if (VB->SecondaryColorPtr[1]) {
+	 COPY_4FV( GET_COLOR(VB->SecondaryColorPtr[1], dst), 
+		   GET_COLOR(VB->SecondaryColorPtr[1], src) );
+      }
+   }
+
+   COPY_PV_VERTEX(ctx, dst, src);
+}
+
+
+#undef INTERP_QUALIFIER
+#undef GET_COLOR
+
+#undef IND
+#undef TAG
diff --git a/src/mach64_native_vbtmp.h b/src/mach64_native_vbtmp.h
new file mode 100644
index 0000000..f64b808
--- /dev/null
+++ b/src/mach64_native_vbtmp.h
@@ -0,0 +1,562 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Original authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ *
+ * Adapted to Mach64 by:
+ *    Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+
+/* DO_XYZW:  Emit xyz and maybe w coordinates.
+ * DO_RGBA:  Emit color.
+ * DO_SPEC:  Emit specular color.
+ * DO_FOG:   Emit fog coordinate in specular alpha.
+ * DO_TEX0:  Emit tex0 u,v coordinates.
+ * DO_TEX1:  Emit tex1 u,v coordinates.
+ * DO_PTEX:  Emit tex0,1 q coordinates where possible.
+ *
+ * Additionally, this template assumes it is emitting *transformed*
+ * vertices; the modifications to emit untransformed vertices (ie. to
+ * t&l hardware) are probably too great to cooexist with the code
+ * already in this file.
+ */
+
+#define VIEWPORT_X(x)  ((GLint) ((s[0]  * (x) + s[12]) * 4.0))
+#define VIEWPORT_Y(y)  ((GLint) ((s[5]  * (y) + s[13]) * 4.0))
+#define VIEWPORT_Z(z) (((GLuint) (s[10] * (z) + s[14])) << 15)
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+static void TAG(emit)( GLcontext *ctx,
+		       GLuint start, GLuint end,
+		       void *dest,
+		       GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+#if DO_TEX1
+   GLfloat (*tc1)[4];
+   GLuint tc1_stride;
+#if DO_PTEX
+   GLuint tc1_size;
+#endif
+#endif
+#if DO_TEX0
+   GLfloat (*tc0)[4];
+   GLuint tc0_stride;
+#if DO_PTEX
+   GLuint tc0_size;
+#endif
+#endif
+#if DO_SPEC
+   GLfloat (*spec)[4];
+   GLuint spec_stride;
+#endif
+#if DO_FOG
+   GLfloat (*fog)[4];
+   GLuint fog_stride;
+#endif
+#if DO_RGBA
+   GLfloat (*col)[4];
+   GLuint col_stride;
+#endif
+   GLfloat (*coord)[4];
+   GLuint coord_stride;
+   VERTEX *v = (VERTEX *)dest;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+#if DO_TEX1 || DO_TEX0 || DO_XYZW
+   const GLubyte *mask = VB->ClipMask;
+#endif
+   int i;
+
+#if !DO_XYZW
+   (void) s; /* Quiet compiler */
+#endif
+/*     fprintf(stderr, "%s(big) importable %d %d..%d\n",  */
+/*  	   __FUNCTION__, VB->importable_data, start, end); */
+
+#if DO_TEX1
+   {
+      const GLuint t1 = GET_TEXSOURCE(1);
+      tc1 = VB->TexCoordPtr[t1]->data;
+      tc1_stride = VB->TexCoordPtr[t1]->stride;
+#if DO_PTEX
+      tc1_size = VB->TexCoordPtr[t1]->size;
+#endif
+   }
+#endif
+
+#if DO_TEX0
+   {
+      const GLuint t0 = GET_TEXSOURCE(0);
+      tc0 = VB->TexCoordPtr[t0]->data;
+      tc0_stride = VB->TexCoordPtr[t0]->stride;
+#if DO_PTEX
+      tc0_size = VB->TexCoordPtr[t0]->size;
+#endif
+   }
+#endif
+
+#if DO_SPEC
+   if (VB->SecondaryColorPtr[0]) {
+      spec = VB->SecondaryColorPtr[0]->data;
+      spec_stride = VB->SecondaryColorPtr[0]->stride;
+   } else {
+      spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1];
+      spec_stride = 0;
+   }
+#endif
+
+#if DO_FOG
+   if (VB->FogCoordPtr) {
+      fog = VB->FogCoordPtr->data;
+      fog_stride = VB->FogCoordPtr->stride;
+   } else {
+      static GLfloat tmp[4] = {0, 0, 0, 0};
+      fog = &tmp;
+      fog_stride = 0;
+   }
+#endif
+
+#if DO_RGBA
+   col = VB->ColorPtr[0]->data;
+   col_stride = VB->ColorPtr[0]->stride;
+#endif
+
+   coord = VB->NdcPtr->data;
+   coord_stride = VB->NdcPtr->stride;
+
+   if (start) {
+#if DO_TEX1
+         STRIDE_4F(tc1, start * tc1_stride);
+#endif
+#if DO_TEX0
+         STRIDE_4F(tc0, start * tc0_stride);
+#endif
+#if DO_SPEC
+	 STRIDE_4F(spec, start * spec_stride);
+#endif
+#if DO_FOG
+	 STRIDE_4F(fog, start * fog_stride);
+#endif
+#if DO_RGBA
+	 STRIDE_4F(col, start * col_stride);
+#endif
+	 STRIDE_4F(coord, start * coord_stride);
+   }
+
+   for (i=start; i < end; i++, v = (VERTEX *)((GLubyte *)v + stride)) {
+	 CARD32 *p = (CARD32 *)v;
+#if DO_TEX1 || DO_TEX0
+	 GLfloat w;
+
+	 if (mask[i] == 0) {
+	    /* unclipped */
+	    w = coord[0][3];
+	 } else {
+	    /* clipped */
+	    w = 1.0;
+	 }
+#endif
+	 
+#if DO_TEX1
+#if DO_PTEX
+	 if (tc1_size == 4) {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc1[0][0] );		/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][1] );		/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][3] );		/* VERTEX_?_SECONDARY_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    float rhw = 1.0 / tc1[0][3];
+	    LE32_OUT_FLOAT( p++, rhw*tc1[0][0] );	/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, rhw*tc1[0][1] );	/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][3] );		/* VERTEX_?_SECONDARY_W */	
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+	 } else {
+#endif /* DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc1[0][0] );		/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][1] );		/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w );			/* VERTEX_?_SECONDARY_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    LE32_OUT_FLOAT( p++, tc1[0][0] );		/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, tc1[0][1] );		/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w );			/* VERTEX_?_SECONDARY_W */
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+#if DO_PTEX
+	 }
+#endif /* DO_PTEX */
+	 STRIDE_4F(tc1, tc1_stride);
+#else /* !DO_TEX1 */
+	 p += 3;
+#endif /* !DO_TEX1 */
+	    
+#if DO_TEX0
+#if DO_PTEX
+	 if (tc0_size == 4) {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc0[0][0] );			/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][1] );			/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][3] );			/* VERTEX_?_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    float rhw = 1.0 / tc0[0][3];
+	    LE32_OUT_FLOAT( p++, rhw*tc0[0][0] );		/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, rhw*tc0[0][1] );		/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][3] );			/* VERTEX_?_W */	
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+	 } else {
+#endif /* DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc0[0][0] );			/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][1] );			/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w );				/* VERTEX_?_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    LE32_OUT_FLOAT( p++, tc0[0][0] );			/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, tc0[0][1] );			/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w );				/* VERTEX_?_W */
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+#if DO_PTEX
+	 }
+#endif /* DO_PTEX */
+	 STRIDE_4F(tc0, tc0_stride);
+#else /* !DO_TEX0 */
+	 p += 3;
+#endif /* !DO_TEX0 */
+
+#if DO_SPEC
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[0],  spec[0][2]); 	/* VERTEX_?_SPEC_B */
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[1],  spec[0][1]);	/* VERTEX_?_SPEC_G */
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[2],  spec[0][0]);	/* VERTEX_?_SPEC_R */
+
+	 STRIDE_4F(spec, spec_stride);
+#endif
+#if DO_FOG
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[3], fog[0][0]);  /* VERTEX_?_SPEC_A */
+	 /*	 ((GLubyte *)p)[3] = fog[0][0] * 255.0;	 */
+	 STRIDE_4F(fog, fog_stride);
+#endif
+	 p++;
+	    
+#if DO_XYZW
+	 if (mask[i] == 0) {
+	    /* unclipped */
+	    LE32_OUT( p++, VIEWPORT_Z( coord[0][2] ) );	/* VERTEX_?_Z */
+	 } else {
+#endif
+	    p++;
+#if DO_XYZW
+	 }
+#endif
+
+#if DO_RGBA
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[0], col[0][2]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[1], col[0][1]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[2], col[0][0]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[3], col[0][3]);
+	 p++;
+	 STRIDE_4F(col, col_stride);
+#else
+	 p++;
+#endif
+
+#if DO_XYZW
+	 if (mask[i] == 0) {
+	    /* unclipped */
+	    LE32_OUT( p,
+		      (VIEWPORT_X( coord[0][0] ) << 16) |	/* VERTEX_?_X */
+		      (VIEWPORT_Y( coord[0][1] ) & 0xffff) );	/* VERTEX_?_Y */
+	    
+	    if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	       fprintf( stderr, "%s: vert %d: %.2f %.2f %.2f %x\n",
+			__FUNCTION__,
+			i,
+			(LE32_IN( p ) >> 16)/4.0,
+			(LE32_IN( p ) & 0xffff)/4.0,
+			LE32_IN( p - 2 )/65536.0,
+			*(GLuint *)(p - 1) );
+	    }
+	 }
+#endif
+#if DO_TEX1 || DO_TEX0 || DO_XYZW
+	 STRIDE_4F(coord, coord_stride);
+#endif
+	 
+	 assert( p + 1 - (CARD32 *)v == 10 );
+      }
+}
+
+#if DO_XYZW && DO_RGBA
+
+static GLboolean TAG(check_tex_sizes)( GLcontext *ctx )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   /* Force 'missing' texcoords to something valid.
+    */
+   if (DO_TEX1 && VB->TexCoordPtr[0] == 0)
+      VB->TexCoordPtr[0] = VB->TexCoordPtr[1];
+
+   if (DO_PTEX)
+      return GL_TRUE;
+
+   /* No hardware support for projective texture.  Can fake it for
+    * TEX0 only.
+    */
+   if ((DO_TEX1 && VB->TexCoordPtr[GET_TEXSOURCE(1)]->size == 4)) {
+      PTEX_FALLBACK();
+      return GL_FALSE;
+   }
+
+   if (DO_TEX0 && VB->TexCoordPtr[GET_TEXSOURCE(0)]->size == 4) {
+      if (DO_TEX1) {
+	 PTEX_FALLBACK();
+      }
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+static void TAG(interp)( GLcontext *ctx,
+			 GLfloat t,
+			 GLuint edst, GLuint eout, GLuint ein,
+			 GLboolean force_boundary )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLubyte *ddverts = GET_VERTEX_STORE();
+   GLuint size = GET_VERTEX_SIZE();
+   const GLfloat *dstclip = VB->ClipPtr->data[edst];
+   GLfloat w;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+
+   CARD32 *dst = (CARD32 *)(ddverts + (edst * size));
+   CARD32 *in  = (CARD32 *)(ddverts + (ein  * size));
+   CARD32 *out = (CARD32 *)(ddverts + (eout * size));
+
+   (void)s;
+
+   w = (dstclip[3] == 0.0F) ? 1.0 : (1.0 / dstclip[3]);
+
+#if DO_TEX1
+   {
+      GLfloat temp;
+#if DO_PTEX
+      GLfloat wout = VB->NdcPtr->data[eout][3];
+      GLfloat win = VB->NdcPtr->data[ein][3];
+      GLfloat qout = LE32_IN_FLOAT( out + 2 ) / wout;
+      GLfloat qin = LE32_IN_FLOAT( in + 2 ) / win;
+      GLfloat qdst, rqdst;
+
+      INTERP_F( t, qdst, qout, qin );
+      rqdst = 1.0 / qdst;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_SECONDARY_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_SECONDARY_T */
+      dst++; out++; in++;
+      
+      LE32_OUT_FLOAT( dst, w*rqdst );				/* VERTEX_?_SECONDARY_W */
+      dst++; out++; in++;
+#else /* !DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+      GLfloat qout = w / LE32_IN_FLOAT( out + 2 );
+      GLfloat qin = w / LE32_IN_FLOAT( in + 2 );
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_T */
+      dst++; out++; in++;
+#else /* !MACH64_PREMULT_TEXCOORDS */
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_T */
+      dst++; out++; in++;
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+      LE32_OUT_FLOAT( dst, w );					/* VERTEX_?_SECONDARY_W */
+      dst++; out++; in++;
+#endif /* !DO_PTEX */
+   }
+#else /* !DO_TEX1 */
+   dst += 3; out += 3; in += 3;
+#endif /* !DO_TEX1 */
+
+#if DO_TEX0
+   {
+      GLfloat temp;
+#if DO_PTEX
+      GLfloat wout = VB->NdcPtr->data[eout][3];
+      GLfloat win = VB->NdcPtr->data[ein][3];
+      GLfloat qout = LE32_IN_FLOAT( out + 2 ) / wout;
+      GLfloat qin = LE32_IN_FLOAT( in + 2 ) / win;
+      GLfloat qdst, rqdst;
+
+      INTERP_F( t, qdst, qout, qin );
+      rqdst = 1.0 / qdst;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_T */
+      dst++; out++; in++;
+      
+      LE32_OUT_FLOAT( dst, w*rqdst );				/* VERTEX_?_W */
+      dst++; out++; in++;
+#else /* !DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+      GLfloat qout = w / LE32_IN_FLOAT( out + 2 );
+      GLfloat qin = w / LE32_IN_FLOAT( in + 2 );
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_T */
+      dst++; out++; in++;
+#else /* !MACH64_PREMULT_TEXCOORDS */
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_T */
+      dst++; out++; in++;
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+      LE32_OUT_FLOAT( dst, w );					/* VERTEX_?_W */
+      dst++; out++; in++;
+#endif /* !DO_PTEX */
+   }
+#else /* !DO_TEX0 */
+   dst += 3; out += 3; in += 3;
+#endif /* !DO_TEX0 */
+   
+#if DO_SPEC
+   INTERP_UB( t, ((GLubyte *)dst)[0], ((GLubyte *)out)[0], ((GLubyte *)in)[0] );	/* VERTEX_?_SPEC_B */
+   INTERP_UB( t, ((GLubyte *)dst)[1], ((GLubyte *)out)[1], ((GLubyte *)in)[1] );	/* VERTEX_?_SPEC_G */
+   INTERP_UB( t, ((GLubyte *)dst)[2], ((GLubyte *)out)[2], ((GLubyte *)in)[2] );	/* VERTEX_?_SPEC_R */
+#endif
+   
+#if DO_FOG
+   INTERP_UB( t, ((GLubyte *)dst)[3], ((GLubyte *)out)[3], ((GLubyte *)in)[3] );	/* VERTEX_?_SPEC_A */
+#endif /* DO_FOG */
+
+   dst++; out++; in++;
+
+   LE32_OUT( dst, VIEWPORT_Z( dstclip[2] * w ) );		/* VERTEX_?_Z */
+   dst++; out++; in++;
+  
+   INTERP_UB( t, ((GLubyte *)dst)[0], ((GLubyte *)out)[0], ((GLubyte *)in)[0] );	/* VERTEX_?_B */
+   INTERP_UB( t, ((GLubyte *)dst)[1], ((GLubyte *)out)[1], ((GLubyte *)in)[1] );	/* VERTEX_?_G */
+   INTERP_UB( t, ((GLubyte *)dst)[2], ((GLubyte *)out)[2], ((GLubyte *)in)[2] );	/* VERTEX_?_R */
+   INTERP_UB( t, ((GLubyte *)dst)[3], ((GLubyte *)out)[3], ((GLubyte *)in)[3] );	/* VERTEX_?_A */
+   dst++; /*out++; in++;*/
+
+   LE32_OUT( dst,
+	     (VIEWPORT_X( dstclip[0] * w ) << 16) |		/* VERTEX_?_X */
+	     (VIEWPORT_Y( dstclip[1] * w ) & 0xffff) );		/* VERTEX_?_Y */
+
+   assert( dst + 1 - (CARD32 *)(ddverts + (edst * size)) == 10 );
+   assert( in  + 2 - (CARD32 *)(ddverts + (ein  * size)) == 10 );
+   assert( out + 2 - (CARD32 *)(ddverts + (eout * size)) == 10 );
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+      fprintf( stderr, "%s: dst vert: %.2f %.2f %.2f %x\n",
+	       __FUNCTION__,
+	       (GLshort)(LE32_IN( dst ) >> 16)/4.0,
+	       (GLshort)(LE32_IN( dst ) & 0xffff)/4.0,
+	       LE32_IN( dst - 2 )/65536.0,
+	       *(GLuint *)(dst - 1) );
+   }
+}
+
+#endif /* DO_RGBA && DO_XYZW */
+
+
+static void TAG(copy_pv)( GLcontext *ctx, GLuint edst, GLuint esrc )
+{
+#if DO_SPEC || DO_FOG || DO_RGBA
+   LOCALVARS   
+   GLubyte *verts = GET_VERTEX_STORE();
+   GLuint size = GET_VERTEX_SIZE();
+   GLuint *dst = (GLuint *)(verts + (edst * size));
+   GLuint *src = (GLuint *)(verts + (esrc * size));
+#endif
+
+#if DO_SPEC || DO_FOG
+   dst[6] = src[6];			/* VERTEX_?_SPEC_ARGB */
+#endif
+
+#if DO_RGBA
+   dst[8] = src[8];			/* VERTEX_?_ARGB */
+#endif
+}
+
+static void TAG(init)( void )
+{
+   setup_tab[IND].emit = TAG(emit);
+
+#if DO_XYZW && DO_RGBA
+   setup_tab[IND].check_tex_sizes = TAG(check_tex_sizes);
+   setup_tab[IND].interp = TAG(interp);
+#endif
+
+   setup_tab[IND].copy_pv = TAG(copy_pv);
+
+#if DO_TEX1
+   setup_tab[IND].vertex_format = TEX1_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 10;
+#elif DO_TEX0
+   setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 7;
+#elif DO_SPEC || DO_FOG
+   setup_tab[IND].vertex_format = NOTEX_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 4;
+#else
+   setup_tab[IND].vertex_format = TINY_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 3;
+#endif
+
+}
+
+
+#undef IND
+#undef TAG
diff --git a/src/mach64_reg.h b/src/mach64_reg.h
new file mode 100644
index 0000000..abbba29
--- /dev/null
+++ b/src/mach64_reg.h
@@ -0,0 +1,406 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_REG_H__
+#define __MACH64_REG_H__
+
+/*
+ * Not sure how this compares with the G200, but the Rage Pro has two
+ * banks of registers, with bank 0 at (aperture base + memmap offset - 1KB)
+ * and bank 1 at (aperture base + memmap offset - 2KB).  But, to send them
+ * via DMA, we need to encode them as memory map select rather than physical
+ * offsets.
+ */
+#define DWMREG0		0x0400
+#define DWMREG0_END	0x07ff
+#define DWMREG1		0x0000
+#define DWMREG1_END	0x03ff
+
+#define ISREG0(r)	( ( (r) >= DWMREG0 ) && ( (r) <= DWMREG0_END ) )
+#define ADRINDEX0(r)	( ((r) - DWMREG0) >> 2 )
+#define ADRINDEX1(r)	( ( ((r) - DWMREG1) >> 2 ) | 0x0100 )
+#define ADRINDEX(r)	( ISREG0(r) ? ADRINDEX0(r) : ADRINDEX1(r) )
+
+#define MMREG0		0x0000
+#define MMREG0_END	0x00ff
+
+#define ISMMREG0(r)	( ( (r) >= MMREG0 ) && ( (r) <= MMREG0_END ) )
+#define MMSELECT0(r)	( ((r)<<2) + DWMREG0 )
+#define MMSELECT1(r)	( ( (((r) & 0xff)<<2) + DWMREG1 ) )
+#define MMSELECT(r)	( ISMMREG0(r) ? MMSELECT0(r) : MMSELECT1(r) )
+
+/* FIXME: If register reads are necessary, we should account for endianess here */
+#define MACH64_BASE(reg)	((CARD32)(mmesa->mach64Screen->mmio.map))
+#define MACH64_ADDR(reg)	(MACH64_BASE(reg) + reg)
+
+#define MACH64_DEREF(reg)	*(__volatile__ CARD32 *)MACH64_ADDR(reg)
+#define MACH64_READ(reg)	MACH64_DEREF(reg)
+
+
+/* ================================================================
+ * Registers
+ */
+
+#define MACH64_ALPHA_TST_CNTL			0x0550
+#	define MACH64_ALPHA_TEST_EN			(1 << 0)
+#	define MACH64_ALPHA_TEST_MASK			(7 << 4)
+#	define MACH64_ALPHA_TEST_NEVER			(0 << 4)
+#	define MACH64_ALPHA_TEST_LESS			(1 << 4)
+#	define MACH64_ALPHA_TEST_LEQUAL			(2 << 4)
+#	define MACH64_ALPHA_TEST_EQUAL			(3 << 4)
+#	define MACH64_ALPHA_TEST_GEQUAL			(4 << 4)
+#	define MACH64_ALPHA_TEST_GREATER		(5 << 4)
+#	define MACH64_ALPHA_TEST_NOTEQUAL		(6 << 4)
+#	define MACH64_ALPHA_TEST_ALWAYS			(7 << 4)
+#	define MACH64_ALPHA_MOD_MSB			(1 << 7)
+#	define MACH64_ALPHA_DST_MASK			(7 << 8)
+#	define MACH64_ALPHA_DST_ZERO			(0 << 8)
+#	define MACH64_ALPHA_DST_ONE			(1 << 8)
+#	define MACH64_ALPHA_DST_SRCALPHA		(4 << 8)
+#	define MACH64_ALPHA_DST_INVSRCALPHA		(5 << 8)
+#	define MACH64_ALPHA_DST_DSTALPHA		(6 << 8)
+#	define MACH64_ALPHA_DST_INVDSTALPHA		(7 << 8)
+#	define MACH64_ALPHA_TST_SRC_TEXEL		(0 << 12)
+#	define MACH64_ALPHA_TST_SRC_SRCALPHA		(1 << 12)
+#	define MACH64_REF_ALPHA_MASK			(0xff << 16)
+#	define MACH64_REF_ALPHA_SHIFT			16
+#	define MACH64_COMPOSITE_SHADOW			(1 << 30)
+#	define MACH64_SPECULAR_LIGHT_EN			(1 << 31)
+
+#define MACH64_BUS_CNTL				0x04a0
+#	define MACH64_BUS_MSTR_RESET			(1 << 1)
+#	define MACH64_BUS_FLUSH_BUF			(1 << 2)
+#	define MACH64_BUS_MASTER_DIS			(1 << 6)
+#	define MACH64_BUS_EXT_REG_EN			(1 << 27)
+
+#define MACH64_COMPOSITE_SHADOW_ID		0x0798
+
+#define MACH64_CLR_CMP_CLR			0x0700
+#define MACH64_CLR_CMP_CNTL			0x0708
+#define MACH64_CLR_CMP_MASK			0x0704
+
+#define MACH64_DP_BKGD_CLR			0x06c0
+#define MACH64_DP_FOG_CLR			0x06c4
+#define MACH64_DP_FGRD_BKGD_CLR			0x06e0
+#define MACH64_DP_FRGD_CLR			0x06c4
+#define MACH64_DP_FGRD_CLR_MIX			0x06dc
+
+#define MACH64_DP_MIX				0x06d4
+#	define BKGD_MIX_NOT_D				(0 << 0)
+#	define BKGD_MIX_ZERO				(1 << 0)
+#	define BKGD_MIX_ONE				(2 << 0)
+#	define MACH64_BKGD_MIX_D			(3 << 0)
+#	define BKGD_MIX_NOT_S				(4 << 0)
+#	define BKGD_MIX_D_XOR_S				(5 << 0)
+#	define BKGD_MIX_NOT_D_XOR_S			(6 << 0)
+#	define MACH64_BKGD_MIX_S			(7 << 0)
+#	define BKGD_MIX_NOT_D_OR_NOT_S			(8 << 0)
+#	define BKGD_MIX_D_OR_NOT_S			(9 << 0)
+#	define BKGD_MIX_NOT_D_OR_S			(10 << 0)
+#	define BKGD_MIX_D_OR_S				(11 << 0)
+#	define BKGD_MIX_D_AND_S				(12 << 0)
+#	define BKGD_MIX_NOT_D_AND_S			(13 << 0)
+#	define BKGD_MIX_D_AND_NOT_S			(14 << 0)
+#	define BKGD_MIX_NOT_D_AND_NOT_S			(15 << 0)
+#	define BKGD_MIX_D_PLUS_S_DIV2			(23 << 0)
+#	define FRGD_MIX_NOT_D				(0 << 16)
+#	define FRGD_MIX_ZERO				(1 << 16)
+#	define FRGD_MIX_ONE				(2 << 16)
+#	define FRGD_MIX_D				(3 << 16)
+#	define FRGD_MIX_NOT_S				(4 << 16)
+#	define FRGD_MIX_D_XOR_S				(5 << 16)
+#	define FRGD_MIX_NOT_D_XOR_S			(6 << 16)
+#	define MACH64_FRGD_MIX_S			(7 << 16)
+#	define FRGD_MIX_NOT_D_OR_NOT_S			(8 << 16)
+#	define FRGD_MIX_D_OR_NOT_S			(9 << 16)
+#	define FRGD_MIX_NOT_D_OR_S			(10 << 16)
+#	define FRGD_MIX_D_OR_S				(11 << 16)
+#	define FRGD_MIX_D_AND_S				(12 << 16)
+#	define FRGD_MIX_NOT_D_AND_S			(13 << 16)
+#	define FRGD_MIX_D_AND_NOT_S			(14 << 16)
+#	define FRGD_MIX_NOT_D_AND_NOT_S			(15 << 16)
+#	define FRGD_MIX_D_PLUS_S_DIV2			(23 << 16)
+
+#define MACH64_DP_PIX_WIDTH			0x06d0
+#	define MACH64_COMPOSITE_PIX_WIDTH_MASK		(0xf << 4)
+#	define MACH64_HOST_TRIPLE_ENABLE		(1 << 13)
+#	define MACH64_BYTE_ORDER_MSB_TO_LSB		(0 << 24)
+#	define MACH64_BYTE_ORDER_LSB_TO_MSB		(1 << 24)
+#	define MACH64_SCALE_PIX_WIDTH_MASK		(0xf << 28)
+
+#define MACH64_DP_SRC				0x06d8
+#	define MACH64_BKGD_SRC_BKGD_CLR			(0 << 0)
+#	define MACH64_BKGD_SRC_FRGD_CLR			(1 << 0)
+#	define MACH64_BKGD_SRC_HOST			(2 << 0)
+#	define MACH64_BKGD_SRC_BLIT			(3 << 0)
+#	define MACH64_BKGD_SRC_PATTERN			(4 << 0)
+#	define MACH64_BKGD_SRC_3D			(5 << 0)
+#	define MACH64_FRGD_SRC_BKGD_CLR			(0 << 8)
+#	define MACH64_FRGD_SRC_FRGD_CLR			(1 << 8)
+#	define MACH64_FRGD_SRC_HOST			(2 << 8)
+#	define MACH64_FRGD_SRC_BLIT			(3 << 8)
+#	define MACH64_FRGD_SRC_PATTERN			(4 << 8)
+#	define MACH64_FRGD_SRC_3D			(5 << 8)
+#	define MACH64_MONO_SRC_ONE			(0 << 16)
+#	define MACH64_MONO_SRC_PATTERN			(1 << 16)
+#	define MACH64_MONO_SRC_HOST			(2 << 16)
+#	define MACH64_MONO_SRC_BLIT			(3 << 16)
+
+#define MACH64_DP_WRITE_MASK			0x06c8
+
+#define MACH64_DST_CNTL				0x0530
+#	define MACH64_DST_X_RIGHT_TO_LEFT		(0 << 0)
+#	define MACH64_DST_X_LEFT_TO_RIGHT		(1 << 0)
+#	define MACH64_DST_Y_BOTTOM_TO_TOP		(0 << 1)
+#	define MACH64_DST_Y_TOP_TO_BOTTOM		(1 << 1)
+#	define MACH64_DST_X_MAJOR			(0 << 2)
+#	define MACH64_DST_Y_MAJOR			(1 << 2)
+#	define MACH64_DST_X_TILE			(1 << 3)
+#	define MACH64_DST_Y_TILE			(1 << 4)
+#	define MACH64_DST_LAST_PEL			(1 << 5)
+#	define MACH64_DST_POLYGON_ENABLE		(1 << 6)
+#	define MACH64_DST_24_ROTATION_ENABLE		(1 << 7)
+
+#define MACH64_DST_HEIGHT_WIDTH			0x0518
+#define MACH64_DST_OFF_PITCH			0x0500
+#define MACH64_DST_WIDTH_HEIGHT			0x06ec
+#define MACH64_DST_X_Y				0x06e8
+#define MACH64_DST_Y_X				0x050c
+
+#define MACH64_FIFO_STAT			0x0710
+#	define MACH64_FIFO_SLOT_MASK			0x0000ffff
+#	define MACH64_FIFO_ERR				(1 << 31)
+
+#define MACH64_GEN_TEST_CNTL			0x04d0
+#define MACH64_GUI_CMDFIFO_DEBUG		0x0170
+#define MACH64_GUI_CMDFIFO_DATA			0x0174
+#define MACH64_GUI_CNTL				0x0178
+#define MACH64_GUI_STAT				0x0738
+#	define MACH64_GUI_ACTIVE			(1 << 0)
+#define MACH64_GUI_TRAJ_CNTL			0x0730
+
+#define MACH64_HOST_CNTL			0x0640
+#define MACH64_HOST_DATA0			0x0600
+#define MACH64_HW_DEBUG				0x047c
+
+#define MACH64_ONE_OVER_AREA			0x029c
+#define MACH64_ONE_OVER_AREA_UC			0x0300
+
+#define MACH64_PAT_REG0				0x0680
+#define MACH64_PAT_REG1				0x0684
+
+#define MACH64_SC_LEFT_RIGHT			0x06a8
+#define MACH64_SC_TOP_BOTTOM			0x06b4
+#define MACH64_SCALE_3D_CNTL			0x05fc
+#	define MACH64_SCALE_PIX_EXPAND_ZERO_EXTEND	(0 << 0)
+#	define MACH64_SCALE_PIX_EXPAND_DYNAMIC_RANGE	(1 << 0)
+#	define MACH64_SCALE_DITHER_ERROR_DIFFUSE	(0 << 1)
+#	define MACH64_SCALE_DITHER_2D_TABLE		(1 << 1)
+#	define MACH64_DITHER_EN				(1 << 2)
+#	define MACH64_DITHER_INIT_CURRENT		(O << 3)
+#	define MACH64_DITHER_INIT_RESET			(1 << 3)
+#	define MACH64_ROUND_EN				(1 << 4)
+#	define MACH64_TEX_CACHE_DIS			(1 << 5)
+#	define MACH64_SCALE_3D_FCN_MASK			(3 << 6)
+#	define MACH64_SCALE_3D_FCN_NOP			(0 << 6)
+#	define MACH64_SCALE_3D_FCN_SCALE		(1 << 6)
+#	define MACH64_SCALE_3D_FCN_TEXTURE		(2 << 6)
+#	define MACH64_SCALE_3D_FCN_SHADE		(3 << 6)
+#	define MACH64_TEXTURE_DISABLE			(1 << 6)
+#	define MACH64_EDGE_ANTI_ALIAS			(1 << 8)
+#	define MACH64_TEX_CACHE_SPLIT			(1 << 9)
+#	define MACH64_APPLE_YUV_MODE			(1 << 10)
+#	define MACH64_ALPHA_FOG_EN_MASK			(3 << 11)
+#	define MACH64_ALPHA_FOG_DIS			(0 << 11)
+#	define MACH64_ALPHA_FOG_EN_ALPHA		(1 << 11)
+#	define MACH64_ALPHA_FOG_EN_FOG			(2 << 11)
+#	define MACH64_ALPHA_BLEND_SAT			(1 << 13)
+#	define MACH64_RED_DITHER_MAX			(1 << 14)
+#	define MACH64_SIGNED_DST_CLAMP			(1 << 15)
+#	define MACH64_ALPHA_BLEND_SRC_MASK		(7 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_ZERO		(0 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_ONE		(1 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_DSTCOLOR		(2 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_INVDSTCOLOR	(3 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_SRCALPHA		(4 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_INVSRCALPHA	(5 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_DSTALPHA		(6 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_INVDSTALPHA	(7 << 16)
+#	define MACH64_ALPHA_BLEND_DST_MASK		(7 << 19)
+#	define MACH64_ALPHA_BLEND_DST_ZERO		(0 << 19)
+#	define MACH64_ALPHA_BLEND_DST_ONE		(1 << 19)
+#	define MACH64_ALPHA_BLEND_DST_SRCCOLOR		(2 << 19)
+#	define MACH64_ALPHA_BLEND_DST_INVSRCCOLOR	(3 << 19)
+#	define MACH64_ALPHA_BLEND_DST_SRCALPHA		(4 << 19)
+#	define MACH64_ALPHA_BLEND_DST_INVSRCALPHA	(5 << 19)
+#	define MACH64_ALPHA_BLEND_DST_DSTALPHA		(6 << 19)
+#	define MACH64_ALPHA_BLEND_DST_INVDSTALPHA	(7 << 19)
+#	define MACH64_TEX_LIGHT_FCN_MASK		(3 << 22)
+#	define MACH64_TEX_LIGHT_FCN_REPLACE		(0 << 22)
+#	define MACH64_TEX_LIGHT_FCN_MODULATE		(1 << 22)
+#	define MACH64_TEX_LIGHT_FCN_ALPHA_DECAL		(2 << 22)
+#	define MACH64_MIP_MAP_DISABLE			(1 << 24)
+#	define MACH64_BILINEAR_TEX_EN			(1 << 25)
+#	define MACH64_TEX_BLEND_FCN_MASK		(3 << 26)
+#	define MACH64_TEX_BLEND_FCN_NEAREST		(0 << 26)
+#	define MACH64_TEX_BLEND_FCN_LINEAR		(2 << 26)
+#	define MACH64_TEX_BLEND_FCN_TRILINEAR		(3 << 26)
+#	define MACH64_TEX_AMASK_AEN			(1 << 28)
+#	define MACH64_TEX_AMASK_BLEND_EDGE		(1 << 29)
+#	define MACH64_TEX_MAP_AEN			(1 << 30)
+#	define MACH64_SRC_3D_HOST_FIFO			(1 << 31)
+#define MACH64_SCRATCH_REG0			0x0480
+#define MACH64_SCRATCH_REG1			0x0484
+#define MACH64_SECONDARY_TEX_OFF		0x0778
+#define MACH64_SETUP_CNTL			0x0304
+#	define MACH64_DONT_START_TRI			(1 << 0)
+#	define MACH64_DONT_START_ANY			(1 << 2)
+#	define MACH64_FLAT_SHADE_MASK			(3 << 3)
+#	define MACH64_FLAT_SHADE_OFF			(0 << 3)
+#	define MACH64_FLAT_SHADE_VERTEX_1		(1 << 3)
+#	define MACH64_FLAT_SHADE_VERTEX_2		(2 << 3)
+#	define MACH64_FLAT_SHADE_VERTEX_3		(3 << 3)
+#	define MACH64_SOLID_MODE_OFF			(0 << 5)
+#	define MACH64_SOLID_MODE_ON			(1 << 5)
+#	define MACH64_LOG_MAX_INC_ADJ			(1 << 6)
+#	define MACH64_SET_UP_CONTINUE			(1 << 31)
+#define MACH64_SRC_CNTL				0x05b4
+#define MACH64_SRC_HEIGHT1			0x0594
+#define MACH64_SRC_HEIGHT2			0x05ac
+#define MACH64_SRC_HEIGHT1_WIDTH1		0x0598
+#define MACH64_SRC_HEIGHT2_WIDTH2		0x05b0
+#define MACH64_SRC_OFF_PITCH			0x0580
+#define MACH64_SRC_WIDTH1			0x0590
+#define MACH64_SRC_Y_X				0x058c
+
+#define MACH64_TEX_0_OFF			0x05c0
+#define MACH64_TEX_CNTL				0x0774
+#	define MACH64_LOD_BIAS_SHIFT			0
+#	define MACH64_LOD_BIAS_MASK			(0xf << 0)
+#	define MACH64_COMP_FACTOR_SHIFT			4
+#	define MACH64_COMP_FACTOR_MASK			(0xf << 4)
+#	define MACH64_TEXTURE_COMPOSITE			(1 << 8)
+#	define MACH64_COMP_COMBINE_BLEND		(0 << 9)
+#	define MACH64_COMP_COMBINE_MODULATE		(1 << 9)
+#	define MACH64_COMP_BLEND_NEAREST		(0 << 11)
+#	define MACH64_COMP_BLEND_BILINEAR		(1 << 11)
+#	define MACH64_COMP_FILTER_NEAREST		(0 << 12)
+#	define MACH64_COMP_FILTER_BILINEAR		(1 << 12)
+#	define MACH64_COMP_ALPHA			(1 << 13)
+#	define MACH64_TEXTURE_TILING			(1 << 14)
+#	define MACH64_COMPOSITE_TEX_TILING		(1 << 15)
+#	define MACH64_TEX_COLLISION_DISABLE		(1 << 16)
+#	define MACH64_TEXTURE_CLAMP_S			(1 << 17)
+#	define MACH64_TEXTURE_CLAMP_T			(1 << 18)
+#	define MACH64_TEX_ST_MULT_W			(0 << 19)
+#	define MACH64_TEX_ST_DIRECT			(1 << 19)
+#	define MACH64_TEX_SRC_LOCAL			(0 << 20)
+#	define MACH64_TEX_SRC_AGP			(1 << 20)
+#	define MACH64_TEX_UNCOMPRESSED			(0 << 21)
+#	define MACH64_TEX_VQ_COMPRESSED			(1 << 21)
+#	define MACH64_COMP_TEX_UNCOMPRESSED		(0 << 22)
+#	define MACH64_COMP_TEX_VQ_COMPRESSED		(1 << 22)
+#	define MACH64_TEX_CACHE_FLUSH			(1 << 23)
+#	define MACH64_SEC_TEX_CLAMP_S			(1 << 24)
+#	define MACH64_SEC_TEX_CLAMP_T			(1 << 25)
+#	define MACH64_TEX_WRAP_S			(1 << 28)
+#	define MACH64_TEX_WRAP_T			(1 << 29)
+#	define MACH64_TEX_CACHE_SIZE_4K			(1 << 30)
+#	define MACH64_TEX_CACHE_SIZE_2K			(1 << 30)
+#	define MACH64_SECONDARY_STW			(1 << 31)
+#define MACH64_TEX_PALETTE			0x077c
+#define MACH64_TEX_PALETTE_INDEX		0x0740
+#define MACH64_TEX_SIZE_PITCH			0x0770
+
+#define MACH64_VERTEX_1_ARGB			0x0254
+#define MACH64_VERTEX_1_S			0x0240
+#define MACH64_VERTEX_1_SECONDARY_S		0x0328
+#define MACH64_VERTEX_1_SECONDARY_T		0x032c
+#define MACH64_VERTEX_1_SECONDARY_W		0x0330
+#define MACH64_VERTEX_1_SPEC_ARGB		0x024c
+#define MACH64_VERTEX_1_T			0x0244
+#define MACH64_VERTEX_1_W			0x0248
+#define MACH64_VERTEX_1_X_Y			0x0258
+#define MACH64_VERTEX_1_Z			0x0250
+#define MACH64_VERTEX_2_ARGB			0x0274
+#define MACH64_VERTEX_2_S			0x0260
+#define MACH64_VERTEX_2_SECONDARY_S		0x0334
+#define MACH64_VERTEX_2_SECONDARY_T		0x0338
+#define MACH64_VERTEX_2_SECONDARY_W		0x033c
+#define MACH64_VERTEX_2_SPEC_ARGB		0x026c
+#define MACH64_VERTEX_2_T			0x0264
+#define MACH64_VERTEX_2_W			0x0268
+#define MACH64_VERTEX_2_X_Y			0x0278
+#define MACH64_VERTEX_2_Z			0x0270
+#define MACH64_VERTEX_3_ARGB			0x0294
+#define MACH64_VERTEX_3_S			0x0280
+#define MACH64_VERTEX_3_SECONDARY_S		0x02a0
+#define MACH64_VERTEX_3_SECONDARY_T		0x02a4
+#define MACH64_VERTEX_3_SECONDARY_W		0x02a8
+#define MACH64_VERTEX_3_SPEC_ARGB		0x028c
+#define MACH64_VERTEX_3_T			0x0284
+#define MACH64_VERTEX_3_W			0x0288
+#define MACH64_VERTEX_3_X_Y			0x0298
+#define MACH64_VERTEX_3_Z			0x0290
+
+#define MACH64_Z_CNTL				0x054c
+#	define MACH64_Z_EN				(1 << 0)
+#	define MACH64_Z_SRC_2D				(1 << 1)
+#	define MACH64_Z_TEST_MASK			(7 << 4)
+#	define MACH64_Z_TEST_NEVER			(0 << 4)
+#	define MACH64_Z_TEST_LESS			(1 << 4)
+#	define MACH64_Z_TEST_LEQUAL			(2 << 4)
+#	define MACH64_Z_TEST_EQUAL			(3 << 4)
+#	define MACH64_Z_TEST_GEQUAL			(4 << 4)
+#	define MACH64_Z_TEST_GREATER			(5 << 4)
+#	define MACH64_Z_TEST_NOTEQUAL			(6 << 4)
+#	define MACH64_Z_TEST_ALWAYS			(7 << 4)
+#	define MACH64_Z_MASK_EN				(1 << 8)
+#define MACH64_Z_OFF_PITCH			0x0548
+
+
+
+#define MACH64_DATATYPE_CI8				2
+#define MACH64_DATATYPE_ARGB1555			3
+#define MACH64_DATATYPE_RGB565				4
+#define MACH64_DATATYPE_ARGB8888			6
+#define MACH64_DATATYPE_RGB332				7
+#define MACH64_DATATYPE_Y8				8
+#define MACH64_DATATYPE_RGB8				9
+#define MACH64_DATATYPE_VYUY422				11
+#define MACH64_DATATYPE_YVYU422				12
+#define MACH64_DATATYPE_AYUV444				14
+#define MACH64_DATATYPE_ARGB4444			15
+
+#define MACH64_LAST_FRAME_REG			MACH64_PAT_REG0
+#define MACH64_LAST_DISPATCH_REG		MACH64_PAT_REG1
+
+#endif /* __MACH64_REG_H__ */
diff --git a/src/mach64_screen.c b/src/mach64_screen.c
new file mode 100644
index 0000000..4e9e216
--- /dev/null
+++ b/src/mach64_screen.c
@@ -0,0 +1,557 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_tris.h"
+#include "mach64_vb.h"
+#include "mach64_span.h"
+
+#include "context.h"
+#include "imports.h"
+#include "framebuffer.h"
+#include "renderbuffer.h"
+
+#include "utils.h"
+#include "vblank.h"
+
+#include "GL/internal/dri_interface.h"
+
+/* Mach64 configuration
+ */
+#include "xmlpool.h"
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_PERFORMANCE
+        DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_DEBUG
+        DRI_CONF_NO_RAST(false)
+#if ENABLE_PERF_BOXES
+        DRI_CONF_PERFORMANCE_BOXES(false)
+#endif
+    DRI_CONF_SECTION_END
+DRI_CONF_END;
+#if ENABLE_PERF_BOXES
+static const GLuint __driNConfigOptions = 3;
+#else
+static const GLuint __driNConfigOptions = 2;
+#endif
+
+extern const struct dri_extension card_extensions[];
+
+static __GLcontextModes * fill_in_modes( __GLcontextModes * modes,
+					 unsigned pixel_bits, 
+					 unsigned depth_bits,
+					 unsigned stencil_bits,
+					 const GLenum * db_modes,
+					 unsigned num_db_modes,
+					 int visType )
+{
+       static const u_int8_t bits[2][4] = {
+	{          5,          6,          5,          0 },
+	{          8,          8,          8,          0 }
+    };
+
+    static const u_int32_t masks[2][4] = {
+	{ 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 },
+	{ 0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000 }
+    };
+
+    unsigned   i;
+    unsigned   j;
+    const unsigned index = ((pixel_bits + 15) / 16) - 1;
+
+    for ( i = 0 ; i < num_db_modes ; i++ ) {
+	for ( j = 0 ; j < 2 ; j++ ) {
+
+	    modes->redBits   = bits[index][0];
+	    modes->greenBits = bits[index][1];
+	    modes->blueBits  = bits[index][2];
+	    modes->alphaBits = bits[index][3];
+	    modes->redMask   = masks[index][0];
+	    modes->greenMask = masks[index][1];
+	    modes->blueMask  = masks[index][2];
+	    modes->alphaMask = masks[index][3];
+	    modes->rgbBits   = modes->redBits + modes->greenBits
+		+ modes->blueBits + modes->alphaBits;
+
+	    modes->accumRedBits   = 16 * j;
+	    modes->accumGreenBits = 16 * j;
+	    modes->accumBlueBits  = 16 * j;
+	    modes->accumAlphaBits = 0;
+	    modes->visualRating = (j == 0) ? GLX_NONE : GLX_SLOW_CONFIG;
+	    modes->drawableType = GLX_WINDOW_BIT | GLX_PIXMAP_BIT;
+	    modes->stencilBits = stencil_bits;
+	    modes->depthBits = depth_bits;
+
+	    modes->visualType = visType;
+	    modes->renderType = GLX_RGBA_BIT;
+	    modes->rgbMode = GL_TRUE;
+
+	    if ( db_modes[i] == GLX_NONE ) {
+
+		modes->doubleBufferMode = GL_FALSE;
+	    }
+	    else {
+		modes->doubleBufferMode = GL_TRUE;
+		modes->swapMethod = db_modes[i];
+	    }
+
+	    modes = modes->next;
+	}
+    }
+    
+    return modes;
+}
+
+
+static __GLcontextModes *
+mach64FillInModes( unsigned pixel_bits, unsigned depth_bits,
+		 unsigned stencil_bits, GLboolean have_back_buffer )
+{
+   __GLcontextModes * modes;
+    __GLcontextModes * m;
+    unsigned num_modes;
+    unsigned depth_buffer_factor;
+    unsigned back_buffer_factor;
+    unsigned i;
+
+    /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy
+     * enough to add support.  Basically, if a context is created with an
+     * fbconfig where the swap method is GLX_SWAP_COPY_OML, pageflipping
+     * will never be used.
+     */
+    static const GLenum back_buffer_modes[] = {
+	GLX_NONE, GLX_SWAP_UNDEFINED_OML /*, GLX_SWAP_COPY_OML */
+    };
+
+    int depth_buffer_modes[2][2];
+
+
+    depth_buffer_modes[0][0] = depth_bits;
+    depth_buffer_modes[1][0] = depth_bits;
+    
+    /* Just like with the accumulation buffer, always provide some modes
+     * with a stencil buffer.  It will be a sw fallback, but some apps won't
+     * care about that.
+     */
+    depth_buffer_modes[0][1] = 0;
+    depth_buffer_modes[1][1] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+    depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
+    back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+
+    num_modes = depth_buffer_factor * back_buffer_factor * 4;
+
+    modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) );
+    m = modes;
+    for ( i = 0 ; i < depth_buffer_factor ; i++ ) {
+	m = fill_in_modes( m, pixel_bits, 
+			   depth_buffer_modes[i][0], depth_buffer_modes[i][1],
+			   back_buffer_modes, back_buffer_factor,
+			   GLX_TRUE_COLOR );
+    }
+
+    for ( i = 0 ; i < depth_buffer_factor ; i++ ) {
+	m = fill_in_modes( m, pixel_bits, 
+			   depth_buffer_modes[i][0], depth_buffer_modes[i][1],
+			   back_buffer_modes, back_buffer_factor,
+			   GLX_DIRECT_COLOR );
+    }
+
+    /* Mark the visual as slow if there are "fake" stencil bits.
+     */
+    for ( m = modes ; m != NULL ; m = m->next ) {
+       if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ){
+	    m->visualRating = GLX_SLOW_CONFIG;
+	}
+    }
+
+    return modes;
+}
+
+
+/* Create the device specific screen private data struct.
+ */
+static mach64ScreenRec *
+mach64CreateScreen( __DRIscreenPrivate *sPriv )
+{
+   mach64ScreenPtr mach64Screen;
+   ATIDRIPtr serverInfo = (ATIDRIPtr)sPriv->pDevPriv;
+   PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension =
+     (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension"));
+   void * const psc = sPriv->psc->screenConfigs;
+
+   if (sPriv->devPrivSize != sizeof(ATIDRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(ATIDRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_DRI ) 
+      fprintf( stderr, "%s\n", __FUNCTION__ );
+
+   /* Allocate the private area */
+   mach64Screen = (mach64ScreenPtr) CALLOC( sizeof(*mach64Screen) );
+   if ( !mach64Screen ) return NULL;
+
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&mach64Screen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   mach64Screen->IsPCI = serverInfo->IsPCI;
+
+   {
+      drm_mach64_getparam_t gp;
+      int ret;
+
+      gp.param = MACH64_PARAM_IRQ_NR;
+      gp.value = (void *) &mach64Screen->irq;
+
+      ret = drmCommandWriteRead( sPriv->fd, DRM_MACH64_GETPARAM,
+				    &gp, sizeof(gp));
+      if (ret) {
+         fprintf(stderr, "DRM_MACH64_GETPARAM (MACH64_PARAM_IRQ_NR): %d\n", ret);
+         FREE( mach64Screen );
+         return NULL;
+      }
+   }
+
+   mach64Screen->mmio.handle = serverInfo->regs;
+   mach64Screen->mmio.size   = serverInfo->regsSize;
+   if ( drmMap( sPriv->fd,
+		mach64Screen->mmio.handle,
+		mach64Screen->mmio.size,
+		(drmAddressPtr)&mach64Screen->mmio.map ) != 0 ) {
+      FREE( mach64Screen );
+      return NULL;
+   }
+
+   mach64Screen->buffers = drmMapBufs( sPriv->fd );
+   if ( !mach64Screen->buffers ) {
+      drmUnmap( (drmAddress)mach64Screen->mmio.map,
+		mach64Screen->mmio.size );
+      FREE( mach64Screen );
+      return NULL;
+   }
+
+   if ( !mach64Screen->IsPCI ) {
+      mach64Screen->agpTextures.handle = serverInfo->agp;
+      mach64Screen->agpTextures.size   = serverInfo->agpSize;
+      if ( drmMap( sPriv->fd,
+		   mach64Screen->agpTextures.handle,
+		   mach64Screen->agpTextures.size,
+		   (drmAddressPtr)&mach64Screen->agpTextures.map ) ) {
+	 drmUnmapBufs( mach64Screen->buffers );
+	 drmUnmap( (drmAddress)mach64Screen->mmio.map, mach64Screen->mmio.size );
+	 FREE( mach64Screen );
+	 return NULL;
+      }
+   }
+
+   mach64Screen->AGPMode	= serverInfo->AGPMode;
+
+   mach64Screen->chipset	= serverInfo->chipset;
+   mach64Screen->width		= serverInfo->width;
+   mach64Screen->height		= serverInfo->height;
+   mach64Screen->mem		= serverInfo->mem;
+   mach64Screen->cpp		= serverInfo->cpp;
+
+   mach64Screen->frontOffset	= serverInfo->frontOffset;
+   mach64Screen->frontPitch	= serverInfo->frontPitch;
+   mach64Screen->backOffset	= serverInfo->backOffset;
+   mach64Screen->backPitch	= serverInfo->backPitch;
+   mach64Screen->depthOffset	= serverInfo->depthOffset;
+   mach64Screen->depthPitch	= serverInfo->depthPitch;
+
+   mach64Screen->texOffset[MACH64_CARD_HEAP] = serverInfo->textureOffset;
+   mach64Screen->texSize[MACH64_CARD_HEAP] = serverInfo->textureSize;
+   mach64Screen->logTexGranularity[MACH64_CARD_HEAP] =
+      serverInfo->logTextureGranularity;
+
+   if ( mach64Screen->IsPCI ) {
+      mach64Screen->numTexHeaps = MACH64_NR_TEX_HEAPS - 1;
+      mach64Screen->firstTexHeap = MACH64_CARD_HEAP;
+      mach64Screen->texOffset[MACH64_AGP_HEAP] = 0;
+      mach64Screen->texSize[MACH64_AGP_HEAP] = 0;
+      mach64Screen->logTexGranularity[MACH64_AGP_HEAP] = 0;
+   } else {
+      if (serverInfo->textureSize > 0) {
+	 mach64Screen->numTexHeaps = MACH64_NR_TEX_HEAPS;
+	 mach64Screen->firstTexHeap = MACH64_CARD_HEAP;
+      } else {
+	 mach64Screen->numTexHeaps = MACH64_NR_TEX_HEAPS - 1;
+	 mach64Screen->firstTexHeap = MACH64_AGP_HEAP;
+      }
+      mach64Screen->texOffset[MACH64_AGP_HEAP] = serverInfo->agpTextureOffset;
+      mach64Screen->texSize[MACH64_AGP_HEAP] = serverInfo->agpSize;
+      mach64Screen->logTexGranularity[MACH64_AGP_HEAP] = serverInfo->logAgpTextureGranularity;
+   }
+
+   mach64Screen->driScreen = sPriv;
+
+   if ( glx_enable_extension != NULL ) {
+      if ( mach64Screen->irq != 0 ) {
+	 (*glx_enable_extension)( psc, "GLX_SGI_swap_control" );
+	 (*glx_enable_extension)( psc, "GLX_SGI_video_sync" );
+	 (*glx_enable_extension)( psc, "GLX_MESA_swap_control" );
+      }
+
+      (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" );
+   }
+
+   return mach64Screen;
+}
+
+/* Destroy the device specific screen private data struct.
+ */
+static void
+mach64DestroyScreen( __DRIscreenPrivate *driScreen )
+{
+   mach64ScreenRec *mach64Screen = (mach64ScreenRec *) driScreen->private;
+
+   if ( !mach64Screen )
+      return;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_DRI ) 
+      fprintf( stderr, "%s\n", __FUNCTION__ );
+
+   if ( !mach64Screen->IsPCI ) {
+      drmUnmap( (drmAddress)mach64Screen->agpTextures.map,
+		mach64Screen->agpTextures.size );
+   }
+
+   drmUnmapBufs( mach64Screen->buffers );
+   drmUnmap( (drmAddress)mach64Screen->mmio.map, mach64Screen->mmio.size );
+
+   FREE( mach64Screen );
+   driScreen->private = NULL;
+}
+
+
+/* Create and initialize the Mesa and driver specific pixmap buffer
+ * data.
+ */
+static GLboolean
+mach64CreateBuffer( __DRIscreenPrivate *driScrnPriv,
+		    __DRIdrawablePrivate *driDrawPriv,
+		    const __GLcontextModes *mesaVis,
+		    GLboolean isPixmap )
+{
+   mach64ScreenPtr screen = (mach64ScreenPtr) driScrnPriv->private;
+
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   }
+   else {
+      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+
+      {
+         driRenderbuffer *frontRb
+            = driNewRenderbuffer(GL_RGBA,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->frontOffset, screen->frontPitch,
+                                 driDrawPriv);
+         mach64SetSpanFunctions(frontRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+      }
+
+      if (mesaVis->doubleBufferMode) {
+         driRenderbuffer *backRb
+            = driNewRenderbuffer(GL_RGBA,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->backOffset, screen->backPitch,
+                                 driDrawPriv);
+         mach64SetSpanFunctions(backRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+      }
+
+      if (mesaVis->depthBits == 16) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
+                                 NULL, screen->cpp,
+                                 screen->depthOffset, screen->depthPitch,
+                                 driDrawPriv);
+         mach64SetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+      else if (mesaVis->depthBits == 24) {
+         /* XXX I don't think 24-bit Z is supported - so this isn't used */
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->depthOffset, screen->depthPitch,
+                                 driDrawPriv);
+         mach64SetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      _mesa_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* color */
+                                   GL_FALSE, /* depth */
+                                   mesaVis->stencilBits > 0,
+                                   mesaVis->accumRedBits > 0,
+                                   GL_FALSE, /* alpha */
+                                   GL_FALSE /* aux */);
+      driDrawPriv->driverPrivate = (void *) fb;
+
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+
+static void
+mach64DestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
+{
+   _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)));
+}
+
+
+/* Copy the back color buffer to the front color buffer */
+static void
+mach64SwapBuffers(__DRIdrawablePrivate *dPriv)
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      mach64ContextPtr mmesa;
+      GLcontext *ctx;
+      mmesa = (mach64ContextPtr) dPriv->driContextPriv->driverPrivate;
+      ctx = mmesa->glCtx;
+      if (ctx->Visual.doubleBufferMode) {
+         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+         mach64CopyBuffer( dPriv );
+      }
+   }
+   else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+   }
+}
+
+
+/* Initialize the driver specific screen private data.
+ */
+static GLboolean
+mach64InitDriver( __DRIscreenPrivate *driScreen )
+{
+   driScreen->private = (void *) mach64CreateScreen( driScreen );
+
+   if ( !driScreen->private ) {
+      mach64DestroyScreen( driScreen );
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+static struct __DriverAPIRec mach64API = {
+   .InitDriver      = mach64InitDriver,
+   .DestroyScreen   = mach64DestroyScreen,
+   .CreateContext   = mach64CreateContext,
+   .DestroyContext  = mach64DestroyContext,
+   .CreateBuffer    = mach64CreateBuffer,
+   .DestroyBuffer   = mach64DestroyBuffer,
+   .SwapBuffers     = mach64SwapBuffers,
+   .MakeCurrent     = mach64MakeCurrent,
+   .UnbindContext   = mach64UnbindContext,
+   .GetSwapInfo     = NULL,
+   .GetMSC          = driGetMSC32,
+   .WaitForMSC      = driWaitForMSC32,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL
+};
+
+
+/**
+ * This is the bootstrap function for the driver.  libGL supplies all of the
+ * requisite information about the system, and the driver initializes itself.
+ * This routine also fills in the linked list pointed to by \c driver_modes
+ * with the \c __GLcontextModes that the driver can support for windows or
+ * pbuffers.
+ * 
+ * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on 
+ *         failure.
+ */
+PUBLIC
+void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIscreen *psc,
+			     const __GLcontextModes * modes,
+			     const __DRIversion * ddx_version,
+			     const __DRIversion * dri_version,
+			     const __DRIversion * drm_version,
+			     const __DRIframebuffer * frame_buffer,
+			     drmAddress pSAREA, int fd, 
+			     int internal_api_version,
+			     const __DRIinterfaceMethods * interface,
+			     __GLcontextModes ** driver_modes )
+			     
+{
+   __DRIscreenPrivate *psp;
+   static const __DRIversion ddx_expected = { 6, 4, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 2, 0, 0 };
+
+   dri_interface = interface;
+
+   if ( ! driCheckDriDdxDrmVersions2( "Mach64",
+				      dri_version, & dri_expected,
+				      ddx_version, & ddx_expected,
+				      drm_version, & drm_expected ) ) {
+      return NULL;
+   }
+
+   psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL,
+				  ddx_version, dri_version, drm_version,
+				  frame_buffer, pSAREA, fd,
+				  internal_api_version, &mach64API);
+   if ( psp != NULL ) {
+      ATIDRIPtr dri_priv = (ATIDRIPtr) psp->pDevPriv;
+      *driver_modes = mach64FillInModes( dri_priv->cpp * 8,
+					 16,
+					 0,
+					 1);
+
+      /* Calling driInitExtensions here, with a NULL context pointer, does not actually
+       * enable the extensions.  It just makes sure that all the dispatch offsets for all
+       * the extensions that *might* be enables are known.  This is needed because the
+       * dispatch offsets need to be known when _mesa_context_create is called, but we can't
+       * enable the extensions until we have a context pointer.
+       *
+       * Hello chicken.  Hello egg.  How are you two today?
+       */
+      driInitExtensions( NULL, card_extensions, GL_FALSE );
+   }
+
+   return (void *) psp;
+}
diff --git a/src/mach64_screen.h b/src/mach64_screen.h
new file mode 100644
index 0000000..5305058
--- /dev/null
+++ b/src/mach64_screen.h
@@ -0,0 +1,78 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_SCREEN_H__
+#define __MACH64_SCREEN_H__
+
+#include "xmlconfig.h"
+
+typedef struct {
+   drm_handle_t handle;			/* Handle to the DRM region */
+   drmSize size;			/* Size of the DRM region */
+   drmAddress *map;			/* Mapping of the DRM region */
+} mach64RegionRec, *mach64RegionPtr;
+
+typedef struct {
+   int chipset;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+
+   unsigned int	frontOffset;
+   unsigned int frontPitch;
+   unsigned int	backOffset;
+   unsigned int backPitch;
+
+   unsigned int	depthOffset;
+   unsigned int depthPitch;
+
+   int IsPCI;
+   int AGPMode;
+   unsigned int irq;			/* IRQ number (0 means none) */
+
+   /* Shared Texture data */
+   int firstTexHeap, numTexHeaps;
+   int texOffset[MACH64_NR_TEX_HEAPS];
+   int texSize[MACH64_NR_TEX_HEAPS];
+   int logTexGranularity[MACH64_NR_TEX_HEAPS];
+
+   mach64RegionRec mmio;
+   mach64RegionRec agpTextures;
+
+   drmBufMapPtr buffers;
+
+   __DRIscreenPrivate *driScreen;
+
+   driOptionCache optionCache;
+} mach64ScreenRec, *mach64ScreenPtr;
+
+#endif /* __MACH64_SCREEN_H__ */
diff --git a/src/mach64_span.c b/src/mach64_span.c
new file mode 100644
index 0000000..3830a28
--- /dev/null
+++ b/src/mach64_span.c
@@ -0,0 +1,169 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_state.h"
+#include "mach64_span.h"
+
+#include "swrast/swrast.h"
+
+#define DBG 0
+
+#define LOCAL_VARS							\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);			\
+   __DRIscreenPrivate *sPriv = mmesa->driScreen;			\
+   __DRIdrawablePrivate *dPriv = mmesa->driDrawable;			\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;			\
+   GLuint height = dPriv->h;						\
+   GLushort p;								\
+   (void) p;
+
+#define LOCAL_DEPTH_VARS						\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);			\
+   __DRIdrawablePrivate *dPriv = mmesa->driDrawable;			\
+   __DRIscreenPrivate *driScreen = mmesa->driScreen;			\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;			\
+   GLuint height = dPriv->h;						\
+   char *buf = (char *)(driScreen->pFB + drb->offset +			\
+			(dPriv->x + dPriv->y * drb->pitch) * 2)
+
+#define LOCAL_STENCIL_VARS	LOCAL_DEPTH_VARS
+
+#define Y_FLIP( _y )	(height - _y - 1)
+
+#define HW_LOCK()
+
+/* FIXME could/should we use dPriv->numClipRects like the other drivers? */
+#define HW_CLIPLOOP()							\
+   do {									\
+      int _nc = mmesa->numClipRects;					\
+									\
+      while ( _nc-- ) {							\
+	 int minx = mmesa->pClipRects[_nc].x1 - mmesa->drawX;		\
+	 int miny = mmesa->pClipRects[_nc].y1 - mmesa->drawY;		\
+	 int maxx = mmesa->pClipRects[_nc].x2 - mmesa->drawX;		\
+	 int maxy = mmesa->pClipRects[_nc].y2 - mmesa->drawY;
+
+#define HW_ENDCLIPLOOP()						\
+      }									\
+   } while (0)
+
+#define HW_UNLOCK()
+
+
+
+/* ================================================================
+ * Color buffer
+ */
+
+/* 16 bit, RGB565 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    mach64##x##_RGB565
+#define TAG2(x,y) mach64##x##_RGB565##y
+#define GET_PTR(X,Y) (sPriv->pFB + drb->offset		\
+     + ((dPriv->y + (Y)) * drb->pitch + (dPriv->x + (X))) * drb->cpp)
+#include "spantmp2.h"
+
+
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+/* FIXME the old code always read back alpha as 0xff, i.e. fully opaque.
+   Was there a reason to do so ? If so that'll won't work with that template... */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    mach64##x##_ARGB8888
+#define TAG2(x,y) mach64##x##_ARGB8888##y
+#define GET_PTR(X,Y) (sPriv->pFB + drb->offset		\
+     + ((dPriv->y + (Y)) * drb->pitch + (dPriv->x + (X))) * drb->cpp)
+#include "spantmp2.h"
+
+
+/* ================================================================
+ * Depth buffer
+ */
+
+/* 16 bit depthbuffer functions.
+ */
+#define WRITE_DEPTH( _x, _y, d )					\
+   *(GLushort *)(buf + ((_x) + (_y) * drb->pitch) * 2) = d;
+
+#define READ_DEPTH( d, _x, _y )						\
+   d = *(GLushort *)(buf + ((_x) + (_y) * drb->pitch) * 2);
+
+#define TAG(x) mach64##x##_z16
+#include "depthtmp.h"
+
+
+static void mach64SpanRenderStart( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   LOCK_HARDWARE( mmesa );
+   FINISH_DMA_LOCKED( mmesa );
+}
+
+static void mach64SpanRenderFinish( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   _swrast_flush( ctx );
+   UNLOCK_HARDWARE( mmesa );
+}
+
+void mach64DDInitSpanFuncs( GLcontext *ctx )
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+   swdd->SpanRenderStart	= mach64SpanRenderStart;
+   swdd->SpanRenderFinish	= mach64SpanRenderFinish;
+}
+
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+mach64SetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
+{
+   if (drb->Base.InternalFormat == GL_RGBA) {
+      if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
+         mach64InitPointers_RGB565(&drb->Base);
+      }
+      else {
+         mach64InitPointers_ARGB8888(&drb->Base);
+      }
+   }
+   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+      mach64InitDepthPointers_z16(&drb->Base);
+   }
+}
diff --git a/src/mach64_span.h b/src/mach64_span.h
new file mode 100644
index 0000000..0f4c766
--- /dev/null
+++ b/src/mach64_span.h
@@ -0,0 +1,41 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __MACH64_SPAN_H__
+#define __MACH64_SPAN_H__
+
+#include "drirenderbuffer.h"
+
+extern void mach64DDInitSpanFuncs( GLcontext *ctx );
+
+extern void
+mach64SetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+
+#endif
diff --git a/src/mach64_state.c b/src/mach64_state.c
new file mode 100644
index 0000000..667a394
--- /dev/null
+++ b/src/mach64_state.c
@@ -0,0 +1,1189 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_state.h"
+#include "mach64_ioctl.h"
+#include "mach64_tris.h"
+#include "mach64_vb.h"
+#include "mach64_tex.h"
+
+#include "context.h"
+#include "enums.h"
+#include "colormac.h"
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "tnl/t_pipeline.h"
+
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void mach64UpdateAlphaMode( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint a = mmesa->setup.alpha_tst_cntl;
+   GLuint s = mmesa->setup.scale_3d_cntl;
+   GLuint m = mmesa->setup.dp_write_mask;
+
+   if ( ctx->Color.AlphaEnabled ) {
+      GLubyte ref;
+
+      CLAMPED_FLOAT_TO_UBYTE(ref, ctx->Color.AlphaRef);
+
+      a &= ~(MACH64_ALPHA_TEST_MASK | MACH64_REF_ALPHA_MASK);
+
+      switch ( ctx->Color.AlphaFunc ) {
+      case GL_NEVER:
+	 a |= MACH64_ALPHA_TEST_NEVER;
+	 break;
+      case GL_LESS:
+	 a |= MACH64_ALPHA_TEST_LESS;
+         break;
+      case GL_LEQUAL:
+	 a |= MACH64_ALPHA_TEST_LEQUAL;
+	 break;
+      case GL_EQUAL:
+	 a |= MACH64_ALPHA_TEST_EQUAL;
+	 break;
+      case GL_GEQUAL:
+	 a |= MACH64_ALPHA_TEST_GEQUAL;
+	 break;
+      case GL_GREATER:
+	 a |= MACH64_ALPHA_TEST_GREATER;
+	 break;
+      case GL_NOTEQUAL:
+	 a |= MACH64_ALPHA_TEST_NOTEQUAL;
+	 break;
+      case GL_ALWAYS:
+	 a |= MACH64_ALPHA_TEST_ALWAYS;
+	 break;
+      }
+
+      a |= (ref << MACH64_REF_ALPHA_SHIFT);
+      a |=  MACH64_ALPHA_TEST_EN;
+   } else {
+      a &= ~MACH64_ALPHA_TEST_EN;
+   }
+
+   FALLBACK( mmesa, MACH64_FALLBACK_BLEND_FUNC, GL_FALSE );
+
+   if ( ctx->Color.BlendEnabled ) {
+      s &= ~(MACH64_ALPHA_BLEND_SRC_MASK |
+	     MACH64_ALPHA_BLEND_DST_MASK |
+	     MACH64_ALPHA_BLEND_SAT);
+
+      switch ( ctx->Color.BlendSrcRGB ) {
+      case GL_ZERO:
+	 s |= MACH64_ALPHA_BLEND_SRC_ZERO;
+	 break;
+      case GL_ONE:
+	 s |= MACH64_ALPHA_BLEND_SRC_ONE;
+	 break;
+      case GL_DST_COLOR:
+	 s |= MACH64_ALPHA_BLEND_SRC_DSTCOLOR;
+	 break;
+      case GL_ONE_MINUS_DST_COLOR:
+	 s |= MACH64_ALPHA_BLEND_SRC_INVDSTCOLOR;
+	 break;
+      case GL_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_SRCALPHA;
+	 break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_INVSRCALPHA;
+	 break;
+      case GL_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_DSTALPHA;
+	 break;
+      case GL_ONE_MINUS_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_INVDSTALPHA;
+	 break;
+      case GL_SRC_ALPHA_SATURATE:
+	 s |= (MACH64_ALPHA_BLEND_SRC_SRCALPHA |
+	       MACH64_ALPHA_BLEND_SAT);
+	 break;
+      default:
+         FALLBACK( mmesa, MACH64_FALLBACK_BLEND_FUNC, GL_TRUE );
+      }
+
+      switch ( ctx->Color.BlendDstRGB ) {
+      case GL_ZERO:
+	 s |= MACH64_ALPHA_BLEND_DST_ZERO;
+	 break;
+      case GL_ONE:
+	 s |= MACH64_ALPHA_BLEND_DST_ONE;
+	 break;
+      case GL_SRC_COLOR:
+	 s |= MACH64_ALPHA_BLEND_DST_SRCCOLOR;
+	 break;
+      case GL_ONE_MINUS_SRC_COLOR:
+	 s |= MACH64_ALPHA_BLEND_DST_INVSRCCOLOR;
+	 break;
+      case GL_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_SRCALPHA;
+	 break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_INVSRCALPHA;
+	 break;
+      case GL_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_DSTALPHA;
+	 break;
+      case GL_ONE_MINUS_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_INVDSTALPHA;
+	 break;
+      default:
+         FALLBACK( mmesa, MACH64_FALLBACK_BLEND_FUNC, GL_TRUE );
+      }
+
+      m = 0xffffffff; /* Can't color mask and blend at the same time */
+      s &= ~MACH64_ALPHA_FOG_EN_FOG; /* Can't fog and blend at the same time */
+      s |=  MACH64_ALPHA_FOG_EN_ALPHA;
+   } else {
+      s &= ~MACH64_ALPHA_FOG_EN_ALPHA;
+   }
+
+   if ( mmesa->setup.alpha_tst_cntl != a ) {
+      mmesa->setup.alpha_tst_cntl = a;
+      mmesa->dirty |= MACH64_UPLOAD_Z_ALPHA_CNTL;
+   }
+   if ( mmesa->setup.scale_3d_cntl != s ) {
+      mmesa->setup.scale_3d_cntl = s;
+      mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+   }
+   if ( mmesa->setup.dp_write_mask != m ) {
+      mmesa->setup.dp_write_mask = m;
+      mmesa->dirty |= MACH64_UPLOAD_DP_WRITE_MASK;
+   }
+}
+
+static void mach64DDAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_ALPHA;
+}
+
+static void mach64DDBlendEquationSeparate( GLcontext *ctx, 
+					   GLenum modeRGB, GLenum modeA )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   assert( modeRGB == modeA );
+   FLUSH_BATCH( mmesa );
+
+   /* BlendEquation affects ColorLogicOpEnabled
+    */
+   FALLBACK( MACH64_CONTEXT(ctx), MACH64_FALLBACK_LOGICOP,
+	     (ctx->Color.ColorLogicOpEnabled &&
+	      ctx->Color.LogicOp != GL_COPY));
+
+   /* Can only do blend addition, not min, max, subtract, etc. */
+   FALLBACK( MACH64_CONTEXT(ctx), MACH64_FALLBACK_BLEND_EQ,
+	     modeRGB != GL_FUNC_ADD);
+
+   mmesa->new_state |= MACH64_NEW_ALPHA;
+}
+
+static void mach64DDBlendFuncSeparate( GLcontext *ctx,
+				       GLenum sfactorRGB, GLenum dfactorRGB,
+				       GLenum sfactorA, GLenum dfactorA )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_ALPHA;
+}
+
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void mach64UpdateZMode( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint z = mmesa->setup.z_cntl;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      fprintf( stderr, "%s:\n", __FUNCTION__ );
+   }
+
+   if ( ctx->Depth.Test ) {
+      z &= ~MACH64_Z_TEST_MASK;
+
+      switch ( ctx->Depth.Func ) {
+      case GL_NEVER:
+	 z |= MACH64_Z_TEST_NEVER;
+	 break;
+      case GL_ALWAYS:
+	 z |= MACH64_Z_TEST_ALWAYS;
+	 break;
+      case GL_LESS:
+	 z |= MACH64_Z_TEST_LESS;
+	 break;
+      case GL_LEQUAL:
+	 z |= MACH64_Z_TEST_LEQUAL;
+	 break;
+      case GL_EQUAL:
+	 z |= MACH64_Z_TEST_EQUAL;
+	 break;
+      case GL_GEQUAL:
+	 z |= MACH64_Z_TEST_GEQUAL;
+	 break;
+      case GL_GREATER:
+	 z |= MACH64_Z_TEST_GREATER;
+	 break;
+      case GL_NOTEQUAL:
+	 z |= MACH64_Z_TEST_NOTEQUAL;
+	 break;
+      }
+
+      z |=  MACH64_Z_EN;
+   } else {
+      z &= ~MACH64_Z_EN;
+   }
+
+   if ( ctx->Depth.Mask ) {
+      z |=  MACH64_Z_MASK_EN;
+   } else {
+      z &= ~MACH64_Z_MASK_EN;
+   }
+
+   if ( mmesa->setup.z_cntl != z ) {
+      mmesa->setup.z_cntl = z;
+      mmesa->dirty |= MACH64_UPLOAD_Z_ALPHA_CNTL;
+   }
+}
+
+static void mach64DDDepthFunc( GLcontext *ctx, GLenum func )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_DEPTH;
+}
+
+static void mach64DDDepthMask( GLcontext *ctx, GLboolean flag )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_DEPTH;
+}
+
+static void mach64DDClearDepth( GLcontext *ctx, GLclampd d )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   /* Always have a 16-bit depth buffer.
+    */
+   mmesa->ClearDepth = d * 0xffff;
+}
+
+
+/* =============================================================
+ * Fog
+ */
+
+static void mach64UpdateFogAttrib( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   CARD32 s = mmesa->setup.scale_3d_cntl;
+   GLubyte c[4];
+   CARD32 col;
+
+   /* Can't fog if blending is on */
+   if ( ctx->Color.BlendEnabled )
+      return;
+
+   if ( ctx->Fog.Enabled ) {
+      s |= MACH64_ALPHA_FOG_EN_FOG;
+      s &= ~(MACH64_ALPHA_BLEND_SRC_MASK |
+	     MACH64_ALPHA_BLEND_DST_MASK |
+	     MACH64_ALPHA_BLEND_SAT);
+      /* From Utah-glx: "fog color is now dest and fog factor is alpha, so
+       * use GL_SRC_ALPHA GL_ONE_MINUS_SRC_ALPHA"
+       */
+      s |= (MACH64_ALPHA_BLEND_SRC_SRCALPHA | 
+	    MACH64_ALPHA_BLEND_DST_INVSRCALPHA);
+      /* From Utah-glx: "can't use texture alpha when fogging" */
+      s &= ~MACH64_TEX_MAP_AEN;
+   } else {
+      s &= ~(MACH64_ALPHA_BLEND_SRC_MASK |
+	     MACH64_ALPHA_BLEND_DST_MASK |
+	     MACH64_ALPHA_BLEND_SAT);
+      s |= (MACH64_ALPHA_BLEND_SRC_ONE | 
+	    MACH64_ALPHA_BLEND_DST_ZERO);
+      s &= ~MACH64_ALPHA_FOG_EN_FOG;
+   }
+
+   c[0] = FLOAT_TO_UBYTE( ctx->Fog.Color[0] );
+   c[1] = FLOAT_TO_UBYTE( ctx->Fog.Color[1] );
+   c[2] = FLOAT_TO_UBYTE( ctx->Fog.Color[2] );
+   c[3] = FLOAT_TO_UBYTE( ctx->Fog.Color[3] );
+
+   col = mach64PackColor( 4, c[0], c[1], c[2], c[3] );
+
+   if ( mmesa->setup.dp_fog_clr != col ) {
+      mmesa->setup.dp_fog_clr = col;
+      mmesa->dirty |= MACH64_UPLOAD_DP_FOG_CLR;
+   }
+   if ( mmesa->setup.scale_3d_cntl != s ) {
+      mmesa->setup.scale_3d_cntl = s;
+      mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+   }
+
+}
+
+static void mach64DDFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_FOG;
+}
+
+
+/* =============================================================
+ * Clipping
+ */
+
+static void mach64UpdateClipping( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   mach64ScreenPtr mach64Screen = mmesa->mach64Screen;
+
+   if ( mmesa->driDrawable ) {
+      __DRIdrawablePrivate *drawable = mmesa->driDrawable;
+      int x1 = 0;
+      int y1 = 0;
+      int x2 = drawable->w - 1;
+      int y2 = drawable->h - 1;
+
+      if ( ctx->Scissor.Enabled ) {
+	 if ( ctx->Scissor.X > x1 ) {
+	    x1 = ctx->Scissor.X;
+	 }
+	 if ( drawable->h - ctx->Scissor.Y - ctx->Scissor.Height > y1 ) {
+	    y1 = drawable->h - ctx->Scissor.Y - ctx->Scissor.Height;
+	 }
+	 if ( ctx->Scissor.X + ctx->Scissor.Width - 1 < x2 ) {
+	    x2 = ctx->Scissor.X + ctx->Scissor.Width - 1;
+	 }
+	 if ( drawable->h - ctx->Scissor.Y - 1 < y2 ) {
+	    y2 = drawable->h - ctx->Scissor.Y - 1;
+	 }
+      }
+
+      x1 += drawable->x;
+      y1 += drawable->y;
+      x2 += drawable->x;
+      y2 += drawable->y;
+
+      /* clamp to screen borders */
+      if (x1 < 0) x1 = 0;
+      if (y1 < 0) y1 = 0;
+      if (x2 < 0) x2 = 0;
+      if (y2 < 0) y2 = 0;
+      if (x2 > mach64Screen->width-1) x2 = mach64Screen->width-1;
+      if (y2 > mach64Screen->height-1) y2 = mach64Screen->height-1;
+
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+	 fprintf( stderr, "%s: drawable %3d %3d %3d %3d\n",
+		  __FUNCTION__,
+		  drawable->x,
+		  drawable->y,
+		  drawable->w,
+		  drawable->h );
+	 fprintf( stderr, "%s:  scissor %3d %3d %3d %3d\n",
+		  __FUNCTION__,
+		  ctx->Scissor.X,
+		  ctx->Scissor.Y,
+		  ctx->Scissor.Width,
+		  ctx->Scissor.Height );
+	 fprintf( stderr, "%s:    final %3d %3d %3d %3d\n",
+		  __FUNCTION__, x1, y1, x2, y2 );
+	 fprintf( stderr, "\n" );
+      }
+
+      mmesa->setup.sc_top_bottom = ((y1 << 0) |
+				    (y2 << 16));
+
+      mmesa->setup.sc_left_right = ((x1 << 0) |
+				    (x2 << 16));
+
+       /* UPLOAD_MISC reduces the dirty state, we just need to
+       * emit the scissor to the SAREA.  We need to dirty cliprects
+       * since the scissor and cliprects are intersected to update the
+       * single hardware scissor
+       */
+      mmesa->dirty |= MACH64_UPLOAD_MISC | MACH64_UPLOAD_CLIPRECTS;
+   }
+}
+
+static void mach64DDScissor( GLcontext *ctx,
+			     GLint x, GLint y, GLsizei w, GLsizei h )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_CLIP;
+}
+
+
+/* =============================================================
+ * Culling
+ */
+
+static void mach64UpdateCull( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLfloat backface_sign = 1;
+
+   if ( ctx->Polygon.CullFlag /*&& ctx->PB->primitive == GL_POLYGON*/ ) {
+      backface_sign = 1;
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_BACK:
+	 if ( ctx->Polygon.FrontFace == GL_CCW )
+	    backface_sign = -1;
+	 break;
+      case GL_FRONT:
+	 if ( ctx->Polygon.FrontFace != GL_CCW )
+	    backface_sign = -1;
+	 break;
+      default:
+      case GL_FRONT_AND_BACK:
+	 backface_sign = 0;
+	 break;
+      }
+   } else {
+      backface_sign = 0;
+   }
+
+   mmesa->backface_sign = backface_sign;
+
+}
+
+static void mach64DDCullFace( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_CULL;
+}
+
+static void mach64DDFrontFace( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_CULL;
+}
+
+
+/* =============================================================
+ * Masks
+ */
+
+static void mach64UpdateMasks( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint mask = 0xffffffff;
+
+   /* mach64 can't color mask with alpha blending enabled */
+   if ( !ctx->Color.BlendEnabled ) {
+      mask = mach64PackColor( mmesa->mach64Screen->cpp,
+			      ctx->Color.ColorMask[RCOMP],
+			      ctx->Color.ColorMask[GCOMP],
+			      ctx->Color.ColorMask[BCOMP],
+			      ctx->Color.ColorMask[ACOMP] );
+   }
+
+   if ( mmesa->setup.dp_write_mask != mask ) {
+      mmesa->setup.dp_write_mask = mask;
+      mmesa->dirty |= MACH64_UPLOAD_DP_WRITE_MASK;
+   }
+}
+
+static void mach64DDColorMask( GLcontext *ctx,
+			       GLboolean r, GLboolean g,
+			       GLboolean b, GLboolean a )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_MASKS;
+}
+
+
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+
+static void mach64UpdateSpecularLighting( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint a = mmesa->setup.alpha_tst_cntl;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      fprintf( stderr, "%s:\n", __FUNCTION__ );
+   }
+
+   if ( ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR  &&
+        ctx->Light.Enabled ) {
+      a |=  MACH64_SPECULAR_LIGHT_EN;
+   } else {
+      a &= ~MACH64_SPECULAR_LIGHT_EN;
+   }
+
+   if ( mmesa->setup.alpha_tst_cntl != a ) {
+      mmesa->setup.alpha_tst_cntl = a;
+      mmesa->dirty |= MACH64_UPLOAD_Z_ALPHA_CNTL;
+      mmesa->new_state |= MACH64_NEW_CONTEXT;
+   }
+}
+
+static void mach64DDLightModelfv( GLcontext *ctx, GLenum pname,
+				  const GLfloat *param )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if ( pname == GL_LIGHT_MODEL_COLOR_CONTROL ) {
+      FLUSH_BATCH( mmesa );
+      mach64UpdateSpecularLighting(ctx);
+   }
+}
+
+static void mach64DDShadeModel( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint s = mmesa->setup.setup_cntl;
+
+   s &= ~MACH64_FLAT_SHADE_MASK;
+
+   switch ( mode ) {
+   case GL_FLAT:
+      s |= MACH64_FLAT_SHADE_VERTEX_3;
+      break;
+   case GL_SMOOTH:
+      s |= MACH64_FLAT_SHADE_OFF;
+      break;
+   default:
+      return;
+   }
+
+   if ( mmesa->setup.setup_cntl != s ) {
+      FLUSH_BATCH( mmesa );
+      mmesa->setup.setup_cntl = s;
+
+      mmesa->dirty |= MACH64_UPLOAD_SETUP_CNTL;
+   }
+}
+
+
+/* =============================================================
+ * Viewport
+ */
+
+
+void mach64CalcViewport( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   GLfloat *m = mmesa->hw_viewport;
+
+   /* See also mach64_translate_vertex.
+    */
+   m[MAT_SX] =   v[MAT_SX];
+   m[MAT_TX] =   v[MAT_TX] + (GLfloat)mmesa->drawX + SUBPIXEL_X;
+   m[MAT_SY] = - v[MAT_SY];
+   m[MAT_TY] = - v[MAT_TY] + mmesa->driDrawable->h + (GLfloat)mmesa->drawY + SUBPIXEL_Y;
+   m[MAT_SZ] =   v[MAT_SZ] * mmesa->depth_scale;
+   m[MAT_TZ] =   v[MAT_TZ] * mmesa->depth_scale;
+
+   mmesa->SetupNewInputs = ~0;
+}
+
+static void mach64Viewport( GLcontext *ctx,
+			  GLint x, GLint y,
+			  GLsizei width, GLsizei height )
+{
+   mach64CalcViewport( ctx );
+}
+
+static void mach64DepthRange( GLcontext *ctx,
+			    GLclampd nearval, GLclampd farval )
+{
+   mach64CalcViewport( ctx );
+}
+
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void mach64DDClearColor( GLcontext *ctx,
+				const GLfloat color[4] )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLubyte c[4];
+   
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+
+   mmesa->ClearColor = mach64PackColor( mmesa->mach64Screen->cpp,
+					c[0], c[1], c[2], c[3] );
+}
+
+static void mach64DDLogicOpCode( GLcontext *ctx, GLenum opcode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   
+   if ( ctx->Color.ColorLogicOpEnabled ) {
+      FLUSH_BATCH( mmesa );
+
+      FALLBACK( mmesa, MACH64_FALLBACK_LOGICOP, opcode != GL_COPY);
+   }
+}
+
+void mach64SetCliprects( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = mmesa->driDrawable;
+
+   switch ( mode ) {
+   case GL_FRONT_LEFT:
+      mmesa->numClipRects = dPriv->numClipRects;
+      mmesa->pClipRects = dPriv->pClipRects;
+      mmesa->drawX = dPriv->x;
+      mmesa->drawY = dPriv->y;
+      break;
+   case GL_BACK_LEFT:
+      if ( dPriv->numBackClipRects == 0 ) {
+	 mmesa->numClipRects = dPriv->numClipRects;
+	 mmesa->pClipRects = dPriv->pClipRects;
+	 mmesa->drawX = dPriv->x;
+	 mmesa->drawY = dPriv->y;
+      } else {
+	 mmesa->numClipRects = dPriv->numBackClipRects;
+	 mmesa->pClipRects = dPriv->pBackClipRects;
+	 mmesa->drawX = dPriv->backX;
+	 mmesa->drawY = dPriv->backY;
+      }
+      break;
+   default:
+      return;
+   }
+
+   mach64UpdateClipping( ctx );
+
+   mmesa->dirty |= MACH64_UPLOAD_CLIPRECTS;
+}
+
+static void mach64DDDrawBuffer( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+
+   /*
+    * _DrawDestMask is easier to cope with than <mode>.
+    */
+   switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
+   case BUFFER_BIT_FRONT_LEFT:
+      FALLBACK( mmesa, MACH64_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      mach64SetCliprects( ctx, GL_FRONT_LEFT );
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"%s: BUFFER_BIT_FRONT_LEFT\n", __FUNCTION__);
+      break;
+   case BUFFER_BIT_BACK_LEFT:
+      FALLBACK( mmesa, MACH64_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      mach64SetCliprects( ctx, GL_BACK_LEFT );
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"%s: BUFFER_BIT_BACK_LEFT\n", __FUNCTION__);
+      break;
+   default:
+      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+      FALLBACK( mmesa, MACH64_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"%s: fallback (mode=%d)\n", __FUNCTION__, mode);
+      break;
+   }
+
+   mmesa->setup.dst_off_pitch = (((mmesa->drawPitch/8) << 22) |
+				 (mmesa->drawOffset >> 3));
+
+   mmesa->dirty |= MACH64_UPLOAD_DST_OFF_PITCH;
+}
+
+static void mach64DDReadBuffer( GLcontext *ctx, GLenum mode )
+{
+   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+}
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void mach64DDEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s = %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( cap ),
+	       state ? "GL_TRUE" : "GL_FALSE" );
+   }
+
+   switch ( cap ) {
+   case GL_ALPHA_TEST:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_ALPHA;
+      break;
+
+   case GL_BLEND:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_ALPHA;
+
+      /* enable(GL_BLEND) affects ColorLogicOpEnabled.
+       */
+      FALLBACK( mmesa, MACH64_FALLBACK_LOGICOP,
+		(ctx->Color.ColorLogicOpEnabled &&
+		 ctx->Color.LogicOp != GL_COPY));
+      break;
+
+   case GL_CULL_FACE:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_CULL;
+      break;
+
+   case GL_DEPTH_TEST:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_DEPTH;
+      break;
+
+   case GL_DITHER:
+      do {
+	 GLuint s = mmesa->setup.scale_3d_cntl;
+	 FLUSH_BATCH( mmesa );
+
+	 if ( ctx->Color.DitherFlag ) {
+	    /* Dithering causes problems w/ 24bpp depth */
+	    if ( mmesa->mach64Screen->cpp == 4 )
+	       s |=  MACH64_ROUND_EN;
+	    else
+	       s |=  MACH64_DITHER_EN;
+	 } else {
+	    s &= ~MACH64_DITHER_EN;
+	    s &= ~MACH64_ROUND_EN;
+	 }
+
+	 if ( mmesa->setup.scale_3d_cntl != s ) {
+	    mmesa->setup.scale_3d_cntl = s;
+	    mmesa->dirty |= ( MACH64_UPLOAD_SCALE_3D_CNTL );
+	 }
+      } while (0);
+      break;
+
+   case GL_FOG:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_FOG;
+      break;
+
+   case GL_INDEX_LOGIC_OP:
+   case GL_COLOR_LOGIC_OP:
+      FLUSH_BATCH( mmesa );
+      FALLBACK( mmesa, MACH64_FALLBACK_LOGICOP,
+		state && ctx->Color.LogicOp != GL_COPY );
+      break;
+
+   case GL_LIGHTING:
+      mach64UpdateSpecularLighting(ctx);
+      break;
+
+   case GL_SCISSOR_TEST:
+      FLUSH_BATCH( mmesa );
+      mmesa->scissor = state;
+      mmesa->new_state |= MACH64_NEW_CLIP;
+      break;
+
+   case GL_STENCIL_TEST:
+      FLUSH_BATCH( mmesa );
+      FALLBACK( mmesa, MACH64_FALLBACK_STENCIL, state );
+      break;
+
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_TEXTURE;
+      break;
+
+   default:
+      return;
+   }
+}
+
+/* =============================================================
+ * Render mode
+ */
+
+static void mach64DDRenderMode( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   FALLBACK( mmesa, MACH64_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+
+/* =============================================================
+ * State initialization, management
+ */
+
+static void mach64DDPrintDirty( const char *msg, GLuint state )
+{
+   fprintf( stderr,
+	    "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s%s\n",
+	    msg,
+	    state,
+	    (state & MACH64_UPLOAD_DST_OFF_PITCH) ? "dst_off_pitch, " : "",
+	    (state & MACH64_UPLOAD_Z_ALPHA_CNTL)  ? "z_alpha_cntl, " : "",
+	    (state & MACH64_UPLOAD_SCALE_3D_CNTL) ? "scale_3d_cntl, " : "",
+	    (state & MACH64_UPLOAD_DP_FOG_CLR)    ? "dp_fog_clr, " : "",
+	    (state & MACH64_UPLOAD_DP_WRITE_MASK) ? "dp_write_mask, " : "",
+	    (state & MACH64_UPLOAD_DP_PIX_WIDTH)  ? "dp_pix_width, " : "",
+	    (state & MACH64_UPLOAD_SETUP_CNTL)    ? "setup_cntl, " : "",
+	    (state & MACH64_UPLOAD_MISC)          ? "misc, " : "",
+	    (state & MACH64_UPLOAD_TEXTURE)       ? "texture, " : "",
+	    (state & MACH64_UPLOAD_TEX0IMAGE)     ? "tex0 image, " : "",
+	    (state & MACH64_UPLOAD_TEX1IMAGE)     ? "tex1 image, " : "",
+	    (state & MACH64_UPLOAD_CLIPRECTS)     ? "cliprects, " : "" );
+}
+
+/*
+ * Load the current context's state into the hardware.
+ *
+ * NOTE: Be VERY careful about ensuring the context state is marked for
+ * upload, the only place it shouldn't be uploaded is when the setup
+ * state has changed in ReducedPrimitiveChange as this comes right after
+ * a state update.
+ *
+ * Blits of any type should always upload the context and masks after
+ * they are done.
+ */
+void mach64EmitHwStateLocked( mach64ContextPtr mmesa )
+{
+   drm_mach64_sarea_t *sarea = mmesa->sarea;
+   drm_mach64_context_regs_t *regs = &(mmesa->setup);
+   mach64TexObjPtr t0 = mmesa->CurrentTexObj[0];
+   mach64TexObjPtr t1 = mmesa->CurrentTexObj[1];
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      mach64DDPrintDirty( __FUNCTION__, mmesa->dirty );
+   }
+
+   if ( t0 && t1 && mmesa->mach64Screen->numTexHeaps > 1 ) {
+      if (t0->heap != t1->heap || 
+	     (mmesa->dirty & MACH64_UPLOAD_TEX0IMAGE) ||
+	     (mmesa->dirty & MACH64_UPLOAD_TEX1IMAGE))
+	 mach64UploadMultiTexImages( mmesa, t0, t1 );
+   } else {
+      if ( mmesa->dirty & MACH64_UPLOAD_TEX0IMAGE ) {
+	 if ( t0 ) mach64UploadTexImages( mmesa, t0 );
+      }
+      if ( mmesa->dirty & MACH64_UPLOAD_TEX1IMAGE ) {
+	 if ( t1 ) mach64UploadTexImages( mmesa, t1 );
+      }
+   }
+
+   if ( mmesa->dirty & (MACH64_UPLOAD_CONTEXT | MACH64_UPLOAD_MISC) ) {
+      memcpy( &sarea->context_state, regs,
+	      MACH64_NR_CONTEXT_REGS * sizeof(GLuint) );
+   }
+
+   if ( mmesa->dirty & MACH64_UPLOAD_TEXTURE ) {
+      mach64EmitTexStateLocked( mmesa, t0, t1 );
+   }
+
+   sarea->vertsize = mmesa->vertex_size;
+
+   /* Turn off the texture cache flushing.
+    */
+   mmesa->setup.tex_cntl &= ~MACH64_TEX_CACHE_FLUSH;
+
+   sarea->dirty |= mmesa->dirty;
+
+   mmesa->dirty &= MACH64_UPLOAD_CLIPRECTS;
+}
+
+static void mach64DDPrintState( const char *msg, GLuint flags )
+{
+   fprintf( stderr,
+	    "%s: (0x%x) %s%s%s%s%s%s%s%s%s\n",
+	    msg,
+	    flags,
+	    (flags & MACH64_NEW_CONTEXT)	? "context, " : "",
+	    (flags & MACH64_NEW_ALPHA)		? "alpha, " : "",
+	    (flags & MACH64_NEW_DEPTH)		? "depth, " : "",
+	    (flags & MACH64_NEW_FOG)		? "fog, " : "",
+	    (flags & MACH64_NEW_CLIP)		? "clip, " : "",
+	    (flags & MACH64_NEW_TEXTURE)	? "texture, " : "",
+	    (flags & MACH64_NEW_CULL)		? "cull, " : "",
+	    (flags & MACH64_NEW_MASKS)		? "masks, " : "",
+	    (flags & MACH64_NEW_WINDOW)		? "window, " : "" );
+}
+
+/* Update the hardware state */
+void mach64DDUpdateHWState( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   int new_state = mmesa->new_state;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      fprintf( stderr, "%s:\n", __FUNCTION__ );
+   }
+
+   if ( new_state )
+   {
+      FLUSH_BATCH( mmesa );
+
+      mmesa->new_state = 0;
+
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG )
+	 mach64DDPrintState( __FUNCTION__, new_state );
+
+      /* Update the various parts of the context's state.
+       */
+      if ( new_state & MACH64_NEW_ALPHA )
+	 mach64UpdateAlphaMode( ctx );
+
+      if ( new_state & MACH64_NEW_DEPTH )
+	 mach64UpdateZMode( ctx );
+
+      if ( new_state & MACH64_NEW_FOG )
+	 mach64UpdateFogAttrib( ctx );
+
+      if ( new_state & MACH64_NEW_CLIP )
+	 mach64UpdateClipping( ctx );
+
+      if ( new_state & MACH64_NEW_WINDOW )
+	 mach64CalcViewport( ctx );
+
+      if ( new_state & MACH64_NEW_CULL )
+	 mach64UpdateCull( ctx );
+
+      if ( new_state & MACH64_NEW_MASKS )
+	 mach64UpdateMasks( ctx );
+
+      if ( new_state & MACH64_NEW_TEXTURE )
+	 mach64UpdateTextureState( ctx );
+   }
+}
+
+
+static void mach64DDInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   MACH64_CONTEXT(ctx)->NewGLState |= new_state;
+}
+
+
+/* Initialize the context's hardware state */
+void mach64DDInitState( mach64ContextPtr mmesa )
+{
+   GLuint format;
+
+   switch ( mmesa->mach64Screen->cpp ) {
+   case 2:
+      format = MACH64_DATATYPE_RGB565;
+      break;
+   case 4:
+      format = MACH64_DATATYPE_ARGB8888;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
+      exit( -1 );
+   }
+
+   /* Always have a 16-bit depth buffer
+    * but Z coordinates are specified in 16.1 format to the setup engine.
+    */
+   mmesa->depth_scale = 2.0;
+
+   mmesa->ClearColor = 0x00000000;
+   mmesa->ClearDepth = 0x0000ffff;
+
+   mmesa->Fallback = 0;
+
+   if ( mmesa->glCtx->Visual.doubleBufferMode ) {
+      mmesa->drawOffset = mmesa->readOffset = mmesa->mach64Screen->backOffset;
+      mmesa->drawPitch  = mmesa->readPitch  = mmesa->mach64Screen->backPitch;
+   } else {
+      mmesa->drawOffset = mmesa->readOffset = mmesa->mach64Screen->frontOffset;
+      mmesa->drawPitch  = mmesa->readPitch  = mmesa->mach64Screen->frontPitch;
+   }
+
+   /* Harware state:
+    */
+   mmesa->setup.dst_off_pitch = (((mmesa->drawPitch/8) << 22) |
+				 (mmesa->drawOffset >> 3));
+
+   mmesa->setup.z_off_pitch = (((mmesa->mach64Screen->depthPitch/8) << 22) |
+			       (mmesa->mach64Screen->depthOffset >> 3));
+
+   mmesa->setup.z_cntl = (MACH64_Z_TEST_LESS |
+			  MACH64_Z_MASK_EN);
+
+   mmesa->setup.alpha_tst_cntl = (MACH64_ALPHA_TEST_ALWAYS |
+				  MACH64_ALPHA_DST_SRCALPHA |
+				  MACH64_ALPHA_TST_SRC_TEXEL |
+				  (0 << MACH64_REF_ALPHA_SHIFT));
+
+   mmesa->setup.scale_3d_cntl = (MACH64_SCALE_PIX_EXPAND_DYNAMIC_RANGE |
+				 /*  MACH64_SCALE_DITHER_ERROR_DIFFUSE | */
+				 MACH64_SCALE_DITHER_2D_TABLE |
+				 /*  MACH64_DITHER_INIT_CURRENT | */
+				 MACH64_DITHER_INIT_RESET |
+				 MACH64_SCALE_3D_FCN_SHADE |
+				 MACH64_ALPHA_FOG_DIS |
+				 MACH64_ALPHA_BLEND_SRC_ONE |
+				 MACH64_ALPHA_BLEND_DST_ZERO |
+				 MACH64_TEX_LIGHT_FCN_MODULATE |
+				 MACH64_MIP_MAP_DISABLE |
+				 MACH64_BILINEAR_TEX_EN |
+				 MACH64_TEX_BLEND_FCN_LINEAR);
+
+   /* GL spec says dithering initially enabled, but dithering causes
+    * problems w/ 24bpp depth
+    */
+   if ( mmesa->mach64Screen->cpp == 4 )
+      mmesa->setup.scale_3d_cntl |= MACH64_ROUND_EN;
+   else
+      mmesa->setup.scale_3d_cntl |= MACH64_DITHER_EN;
+
+   mmesa->setup.sc_left_right = 0x1fff0000;
+   mmesa->setup.sc_top_bottom = 0x3fff0000;
+
+   mmesa->setup.dp_fog_clr    = 0x00ffffff;
+   mmesa->setup.dp_write_mask = 0xffffffff;
+
+   mmesa->setup.dp_pix_width = ((format << 0) |
+				(format << 4) |
+				(format << 8) |
+				(format << 16) |
+				(format << 28));
+
+   mmesa->setup.dp_mix = (MACH64_BKGD_MIX_S |
+			  MACH64_FRGD_MIX_S);
+   mmesa->setup.dp_src = (MACH64_BKGD_SRC_3D |
+			  MACH64_FRGD_SRC_3D |
+			  MACH64_MONO_SRC_ONE);
+
+   mmesa->setup.clr_cmp_cntl  = 0x00000000;
+   mmesa->setup.gui_traj_cntl = (MACH64_DST_X_LEFT_TO_RIGHT |
+				 MACH64_DST_Y_TOP_TO_BOTTOM);
+
+   mmesa->setup.setup_cntl = (MACH64_FLAT_SHADE_OFF |
+			      MACH64_SOLID_MODE_OFF |
+			      MACH64_LOG_MAX_INC_ADJ);
+   mmesa->setup.setup_cntl = 0;
+
+   mmesa->setup.tex_size_pitch = 0x00000000;
+
+   mmesa->setup.tex_cntl = ((0 << MACH64_LOD_BIAS_SHIFT) |
+			    (0 << MACH64_COMP_FACTOR_SHIFT) |
+			    MACH64_COMP_COMBINE_MODULATE |
+			    MACH64_COMP_BLEND_NEAREST |
+			    MACH64_COMP_FILTER_NEAREST |
+			    /* MACH64_TEXTURE_TILING | */
+#ifdef MACH64_PREMULT_TEXCOORDS
+			    MACH64_TEX_ST_DIRECT | 
+#endif
+			    MACH64_TEX_SRC_LOCAL |
+			    MACH64_TEX_UNCOMPRESSED |
+			    MACH64_TEX_CACHE_FLUSH |
+			    MACH64_TEX_CACHE_SIZE_4K);
+
+   mmesa->setup.secondary_tex_off = 0x00000000;
+   mmesa->setup.tex_offset = 0x00000000;
+
+   mmesa->new_state = MACH64_NEW_ALL;
+}
+
+/* Initialize the driver's state functions.
+  */
+void mach64DDInitStateFuncs( GLcontext *ctx )
+{
+   ctx->Driver.UpdateState		= mach64DDInvalidateState;
+
+   ctx->Driver.ClearIndex		= NULL;
+   ctx->Driver.ClearColor		= mach64DDClearColor;
+   ctx->Driver.DrawBuffer		= mach64DDDrawBuffer;
+   ctx->Driver.ReadBuffer		= mach64DDReadBuffer;
+
+   ctx->Driver.IndexMask		= NULL;
+   ctx->Driver.ColorMask		= mach64DDColorMask;
+   ctx->Driver.AlphaFunc		= mach64DDAlphaFunc;
+   ctx->Driver.BlendEquationSeparate	= mach64DDBlendEquationSeparate;
+   ctx->Driver.BlendFuncSeparate	= mach64DDBlendFuncSeparate;
+   ctx->Driver.ClearDepth		= mach64DDClearDepth;
+   ctx->Driver.CullFace			= mach64DDCullFace;
+   ctx->Driver.FrontFace		= mach64DDFrontFace;
+   ctx->Driver.DepthFunc		= mach64DDDepthFunc;
+   ctx->Driver.DepthMask		= mach64DDDepthMask;
+   ctx->Driver.Enable			= mach64DDEnable;
+   ctx->Driver.Fogfv			= mach64DDFogfv;
+   ctx->Driver.Hint			= NULL;
+   ctx->Driver.Lightfv			= NULL;
+   ctx->Driver.LightModelfv		= mach64DDLightModelfv;
+   ctx->Driver.LogicOpcode		= mach64DDLogicOpCode;
+   ctx->Driver.PolygonMode		= NULL;
+   ctx->Driver.PolygonStipple		= NULL;
+   ctx->Driver.RenderMode		= mach64DDRenderMode;
+   ctx->Driver.Scissor			= mach64DDScissor;
+   ctx->Driver.ShadeModel		= mach64DDShadeModel;
+   
+   ctx->Driver.DepthRange		= mach64DepthRange;
+   ctx->Driver.Viewport			= mach64Viewport;
+}
diff --git a/src/mach64_state.h b/src/mach64_state.h
new file mode 100644
index 0000000..95bcab3
--- /dev/null
+++ b/src/mach64_state.h
@@ -0,0 +1,47 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ */
+
+#ifndef __MACH64_STATE_H__
+#define __MACH64_STATE_H__
+
+#include "mach64_context.h"
+
+extern void mach64DDInitState( mach64ContextPtr mmesa );
+extern void mach64DDInitStateFuncs( GLcontext *ctx );
+
+extern void mach64SetCliprects( GLcontext *ctx, GLenum mode );
+extern void mach64CalcViewport( GLcontext *ctx );
+
+extern void mach64DDUpdateState( GLcontext *ctx );
+extern void mach64DDUpdateHWState( GLcontext *ctx );
+
+extern void mach64EmitHwStateLocked( mach64ContextPtr mmesa );
+
+#endif
diff --git a/src/mach64_tex.c b/src/mach64_tex.c
new file mode 100644
index 0000000..5288d32
--- /dev/null
+++ b/src/mach64_tex.c
@@ -0,0 +1,567 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_state.h"
+#include "mach64_vb.h"
+#include "mach64_tris.h"
+#include "mach64_tex.h"
+
+#include "context.h"
+#include "macros.h"
+#include "simple_list.h"
+#include "enums.h"
+#include "texstore.h"
+#include "texformat.h"
+#include "teximage.h"
+#include "texobj.h"
+#include "imports.h"
+
+
+static void mach64SetTexWrap( mach64TexObjPtr t,
+			      GLenum swrap, GLenum twrap )
+{
+   switch ( swrap ) {
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+   case GL_CLAMP_TO_BORDER:
+      t->ClampS = GL_TRUE;
+      break;
+   case GL_REPEAT:
+      t->ClampS = GL_FALSE;
+      break;
+   }
+
+   switch ( twrap ) {
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+   case GL_CLAMP_TO_BORDER:
+      t->ClampT = GL_TRUE;
+      break;
+   case GL_REPEAT:
+      t->ClampT = GL_FALSE;
+      break;
+   }
+}
+
+static void mach64SetTexFilter( mach64TexObjPtr t,
+				GLenum minf, GLenum magf )
+{
+   switch ( minf ) {
+   case GL_NEAREST:
+   case GL_NEAREST_MIPMAP_NEAREST:
+   case GL_NEAREST_MIPMAP_LINEAR:
+      t->BilinearMin = GL_FALSE;
+      break;
+   case GL_LINEAR:
+   case GL_LINEAR_MIPMAP_NEAREST:
+   case GL_LINEAR_MIPMAP_LINEAR:
+      t->BilinearMin = GL_TRUE;
+      break;
+   }
+
+   switch ( magf ) {
+   case GL_NEAREST:
+      t->BilinearMag = GL_FALSE;
+      break;
+   case GL_LINEAR:
+      t->BilinearMag = GL_TRUE;
+      break;
+   }
+}
+
+static void mach64SetTexBorderColor( mach64TexObjPtr t, GLubyte c[4] )
+{
+#if 0
+   GLuint border = mach64PackColor( 4, c[0], c[1], c[2], c[3] );
+#endif
+}
+
+
+static mach64TexObjPtr
+mach64AllocTexObj( struct gl_texture_object *texObj )
+{
+   mach64TexObjPtr t;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API )
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, texObj );
+
+   t = (mach64TexObjPtr) CALLOC_STRUCT( mach64_texture_object );
+   texObj->DriverData = t;
+   if ( !t )
+      return NULL;
+
+   /* Initialize non-image-dependent parts of the state:
+    */
+   t->base.tObj = texObj;
+   t->base.dirty_images[0] = (1 << 0);
+
+   t->bufAddr = 0;
+
+   make_empty_list( (driTextureObject *) t );
+
+   mach64SetTexWrap( t, texObj->WrapS, texObj->WrapT );
+   mach64SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+   mach64SetTexBorderColor( t, texObj->_BorderChan );
+
+   return t;
+}
+
+
+/* Called by the _mesa_store_teximage[123]d() functions. */
+static const struct gl_texture_format *
+mach64ChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+			   GLenum format, GLenum type )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   (void) format;
+   (void) type;
+
+   switch ( internalFormat ) {
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case 4:
+   case GL_RGBA:
+   case GL_RGBA2:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888;
+      else
+         return &_mesa_texformat_argb4444;
+
+   case GL_RGB5_A1:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888;
+      else
+         return &_mesa_texformat_argb1555;
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+   case GL_RGBA4:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888;
+      else
+         return &_mesa_texformat_argb4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888;
+      else
+         return &_mesa_texformat_rgb565;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888; /* inefficient but accurate */
+      else
+         return &_mesa_texformat_argb1555;
+
+   case GL_INTENSITY4:
+   case GL_INTENSITY:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888; /* inefficient but accurate */
+      else
+         return &_mesa_texformat_argb4444;
+
+   case GL_COLOR_INDEX:
+   case GL_COLOR_INDEX1_EXT:
+   case GL_COLOR_INDEX2_EXT:
+   case GL_COLOR_INDEX4_EXT:
+   case GL_COLOR_INDEX8_EXT:
+   case GL_COLOR_INDEX12_EXT:
+   case GL_COLOR_INDEX16_EXT:
+      return &_mesa_texformat_ci8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+          type == GL_UNSIGNED_BYTE)
+         return &_mesa_texformat_ycbcr;
+      else
+         return &_mesa_texformat_ycbcr_rev;
+
+   default:
+      _mesa_problem( ctx, "unexpected format in %s", __FUNCTION__ );
+      return NULL;
+   }
+}
+
+static void mach64TexImage1D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint border,
+			    GLenum format, GLenum type, const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+         return;
+      }
+   }
+
+   /* Note, this will call mach64ChooseTextureFormat */
+   _mesa_store_teximage1d( ctx, target, level, internalFormat,
+			   width, border, format, type,
+			   pixels, packing, texObj, texImage );
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64TexSubImage1D( GLcontext *ctx,
+				 GLenum target,
+				 GLint level,
+				 GLint xoffset,
+				 GLsizei width,
+				 GLenum format, GLenum type,
+				 const GLvoid *pixels,
+				 const struct gl_pixelstore_attrib *packing,
+				 struct gl_texture_object *texObj,
+				 struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+         return;
+      }
+   }
+
+   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+			     format, type, pixels, packing, texObj,
+			     texImage);
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64TexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			      GLint internalFormat,
+			      GLint width, GLint height, GLint border,
+			      GLenum format, GLenum type, const GLvoid *pixels,
+			      const struct gl_pixelstore_attrib *packing,
+			      struct gl_texture_object *texObj,
+			      struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+         return;
+      }
+   }
+
+   /* Note, this will call mach64ChooseTextureFormat */
+   _mesa_store_teximage2d( ctx, target, level, internalFormat,
+			   width, height, border, format, type, pixels,
+			   &ctx->Unpack, texObj, texImage );
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64TexSubImage2D( GLcontext *ctx,
+				 GLenum target,
+				 GLint level,
+				 GLint xoffset, GLint yoffset,
+				 GLsizei width, GLsizei height,
+				 GLenum format, GLenum type,
+				 const GLvoid *pixels,
+				 const struct gl_pixelstore_attrib *packing,
+				 struct gl_texture_object *texObj,
+				 struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+         return;
+      }
+   }
+
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+			     height, format, type, pixels, packing, texObj,
+			     texImage);
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+/* ================================================================
+ * Device Driver API texture functions
+ */
+
+static void mach64DDTexEnv( GLcontext *ctx, GLenum target,
+			    GLenum pname, const GLfloat *param )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+#if 0
+   struct gl_texture_unit *texUnit;
+   GLubyte c[4];
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   switch ( pname ) {
+   case GL_TEXTURE_ENV_MODE:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_TEXTURE | MACH64_NEW_ALPHA;
+      break;
+
+#if 0
+   case GL_TEXTURE_ENV_COLOR:
+      texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+      CLAMPED_FLOAT_TO_UBYTE( c[0], texUnit->EnvColor[0] );
+      CLAMPED_FLOAT_TO_UBYTE( c[1], texUnit->EnvColor[1] );
+      CLAMPED_FLOAT_TO_UBYTE( c[2], texUnit->EnvColor[2] );
+      CLAMPED_FLOAT_TO_UBYTE( c[3], texUnit->EnvColor[3] );
+      mmesa->env_color = mach64PackColor( 32, c[0], c[1], c[2], c[3] );
+      if ( mmesa->setup.constant_color_c != mmesa->env_color ) {
+	 FLUSH_BATCH( mmesa );
+	 mmesa->setup.constant_color_c = mmesa->env_color;
+
+	 mmesa->new_state |= MACH64_NEW_TEXTURE;
+
+	 /* More complex multitexture/multipass fallbacks for GL_BLEND
+	  * can be done later, but this allows a single pass GL_BLEND
+	  * in some cases (ie. Performer town demo).
+	  */
+	 mmesa->blend_flags &= ~MACH64_BLEND_ENV_COLOR;
+	 if ( mmesa->env_color != 0x00000000 &&
+	      mmesa->env_color != 0xff000000 &&
+	      mmesa->env_color != 0x00ffffff &&
+	      mmesa->env_color != 0xffffffff )) {	
+	    mmesa->blend_flags |= MACH64_BLEND_ENV_COLOR;
+	 }
+      }
+      break;
+#endif
+
+   default:
+      return;
+   }
+}
+
+static void mach64DDTexParameter( GLcontext *ctx, GLenum target,
+				  struct gl_texture_object *tObj,
+				  GLenum pname, const GLfloat *params )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   mach64TexObjPtr t = (mach64TexObjPtr)tObj->DriverData;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   if ( ( target != GL_TEXTURE_2D ) &&
+	( target != GL_TEXTURE_1D ) ) {
+      return;
+   }
+
+   if (!t) {
+      t = mach64AllocTexObj(tObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexParameter");
+         return;
+      }
+   }
+
+   switch ( pname ) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+      if ( t->base.bound ) FLUSH_BATCH( mmesa );
+      mach64SetTexFilter( t, tObj->MinFilter, tObj->MagFilter );
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+      if ( t->base.bound ) FLUSH_BATCH( mmesa );
+      mach64SetTexWrap( t, tObj->WrapS, tObj->WrapT );
+      break;
+
+   case GL_TEXTURE_BORDER_COLOR:
+      if ( t->base.bound ) FLUSH_BATCH( mmesa );
+      mach64SetTexBorderColor( t, tObj->_BorderChan );
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+      /* From Radeon/Rage128:
+       * This isn't the most efficient solution but there doesn't appear to
+       * be a nice alternative.  Since there's no LOD clamping,
+       * we just have to rely on loading the right subset of mipmap levels
+       * to simulate a clamped LOD.  
+       *
+       * For mach64 we're only concerned with the base level
+       * since that's the only texture we upload.
+       */
+      if ( t->base.bound ) FLUSH_BATCH( mmesa );
+      driSwapOutTextureObject( (driTextureObject *) t );
+      break;
+
+   default:
+      return;
+   }
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64DDBindTexture( GLcontext *ctx, GLenum target,
+				 struct gl_texture_object *tObj )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLint unit = ctx->Texture.CurrentUnit;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p ) unit=%d\n",
+	       __FUNCTION__, tObj, unit );
+   }
+
+   FLUSH_BATCH( mmesa );
+
+   if ( mmesa->CurrentTexObj[unit] ) {
+      mmesa->CurrentTexObj[unit]->base.bound &= ~(1 << unit);
+      mmesa->CurrentTexObj[unit] = NULL;
+   }
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64DDDeleteTexture( GLcontext *ctx,
+				   struct gl_texture_object *tObj )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) tObj->DriverData;
+
+   if ( t ) {
+      if ( t->bound && mmesa ) {
+	 FLUSH_BATCH( mmesa );
+
+	 mmesa->new_state |= MACH64_NEW_TEXTURE;
+      }
+
+      driDestroyTextureObject( t );
+
+      /* Free mipmap images and the texture object itself */
+      _mesa_delete_texture_object(ctx, tObj);
+   }
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object.  Not done at this time.
+ */
+static struct gl_texture_object *
+mach64NewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+{
+   struct gl_texture_object *obj;
+   obj = _mesa_new_texture_object(ctx, name, target);
+   mach64AllocTexObj( obj );
+   return obj;
+}
+
+void mach64InitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->TexEnv			= mach64DDTexEnv;
+   functions->ChooseTextureFormat	= mach64ChooseTextureFormat;
+   functions->TexImage1D		= mach64TexImage1D;
+   functions->TexSubImage1D		= mach64TexSubImage1D;
+   functions->TexImage2D		= mach64TexImage2D;
+   functions->TexSubImage2D		= mach64TexSubImage2D;
+   functions->TexParameter		= mach64DDTexParameter;
+   functions->BindTexture		= mach64DDBindTexture;
+   functions->NewTextureObject		= mach64NewTextureObject;
+   functions->DeleteTexture		= mach64DDDeleteTexture;
+   functions->IsTextureResident		= driIsTextureResident;
+
+   functions->UpdateTexturePalette	= NULL;
+   functions->ActiveTexture		= NULL;
+   functions->PrioritizeTexture		= NULL;
+
+   driInitTextureFormats();
+}
diff --git a/src/mach64_tex.h b/src/mach64_tex.h
new file mode 100644
index 0000000..f6cf1cf
--- /dev/null
+++ b/src/mach64_tex.h
@@ -0,0 +1,89 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_TEX_H__
+#define __MACH64_TEX_H__
+
+extern void mach64UpdateTextureState( GLcontext *ctx );
+
+extern void mach64UploadTexImages( mach64ContextPtr mach64ctx,
+				   mach64TexObjPtr t );
+
+extern void mach64UploadMultiTexImages( mach64ContextPtr mach64ctx,
+					mach64TexObjPtr t0, mach64TexObjPtr t1 );
+
+extern void mach64DestroyTexObj( mach64ContextPtr mach64ctx,
+				 mach64TexObjPtr t );
+
+extern void mach64EmitTexStateLocked( mach64ContextPtr mmesa,
+				      mach64TexObjPtr t0,
+				      mach64TexObjPtr t1 );
+
+extern void mach64InitTextureFuncs( struct dd_function_table *functions );
+
+/* ================================================================
+ * Color conversion macros:
+ */
+
+#define MACH64PACKCOLOR332(r, g, b)					\
+   (((r) & 0xe0) | (((g) & 0xe0) >> 3) | (((b) & 0xc0) >> 6))
+
+#define MACH64PACKCOLOR1555(r, g, b, a)					\
+   ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) |	\
+    ((a) ? 0x8000 : 0))
+
+#define MACH64PACKCOLOR565(r, g, b)					\
+   ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define MACH64PACKCOLOR888(r, g, b)					\
+   (((r) << 16) | ((g) << 8) | (b))
+
+#define MACH64PACKCOLOR8888(r, g, b, a)					\
+   (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+
+#define MACH64PACKCOLOR4444(r, g, b, a)					\
+   ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
+
+static __inline__ GLuint mach64PackColor( GLuint cpp,
+					  GLubyte r, GLubyte g,
+					  GLubyte b, GLubyte a )
+{
+   switch ( cpp ) {
+   case 2:
+      return MACH64PACKCOLOR565( r, g, b );
+   case 4:
+      return MACH64PACKCOLOR8888( r, g, b, a );
+   default:
+      return 0;
+   }
+}
+
+#endif
diff --git a/src/mach64_texmem.c b/src/mach64_texmem.c
new file mode 100644
index 0000000..3b7b93b
--- /dev/null
+++ b/src/mach64_texmem.c
@@ -0,0 +1,506 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+ *                                                Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Leif Delgass <ldelgass@retinalburn.net>
+ *   Jose Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_state.h"
+#include "mach64_ioctl.h"
+#include "mach64_vb.h"
+#include "mach64_tris.h"
+#include "mach64_tex.h"
+
+#include "context.h"
+#include "macros.h"
+#include "simple_list.h"
+#include "texformat.h"
+#include "imports.h"
+
+
+/* Destroy hardware state associated with texture `t'.
+ */
+void mach64DestroyTexObj( mach64ContextPtr mmesa, mach64TexObjPtr t )
+{
+   unsigned   i;
+
+   /* See if it was the driver's current object.
+    */
+   if ( mmesa != NULL )
+   {
+      for ( i = 0 ; i < mmesa->glCtx->Const.MaxTextureUnits ; i++ )
+      {
+         if ( t == mmesa->CurrentTexObj[ i ] ) {
+            assert( t->base.bound & (1 << i) );
+            mmesa->CurrentTexObj[ i ] = NULL;
+         }
+      }
+   }
+}
+
+/* Upload the texture image associated with texture `t' at level `level'
+ * at the address relative to `start'.
+ */
+static void mach64UploadAGPSubImage( mach64ContextPtr mmesa,
+				     mach64TexObjPtr t, int level,
+				     int x, int y, int width, int height )
+{
+   mach64ScreenRec *mach64Screen = mmesa->mach64Screen;
+   struct gl_texture_image *image;
+   int texelsPerDword = 0;
+   int dwords;
+
+   /* Ensure we have a valid texture to upload */
+   if ( ( level < 0 ) || ( level > mmesa->glCtx->Const.MaxTextureLevels ) )
+     return;
+
+   image = t->base.tObj->Image[0][level];
+   if ( !image )
+      return;
+
+   switch ( image->TexFormat->TexelBytes ) {
+   case 1: texelsPerDword = 4; break;
+   case 2: texelsPerDword = 2; break;
+   case 4: texelsPerDword = 1; break;
+   }
+
+#if 1
+   /* FIXME: The subimage index calcs are wrong... */
+   x = 0;
+   y = 0;
+   width = image->Width;
+   height = image->Height;
+#endif
+
+   dwords = width * height / texelsPerDword;
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_agpTextureBytes += (dwords << 2);
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "mach64UploadSubImage: %d,%d of %d,%d at %d,%d\n",
+	       width, height, image->Width, image->Height, x, y );
+      fprintf( stderr, "            blit ofs: 0x%07x pitch: 0x%x dwords: %d\n",
+	       (GLuint)t->bufAddr, (GLint)width, dwords );
+   }
+
+   assert(image->Data);
+
+   {
+      CARD32 *dst = (CARD32 *)((char *)mach64Screen->agpTextures.map + t->base.memBlock->ofs);
+      const GLubyte *src = (const GLubyte *) image->Data +
+	 (y * image->Width + x) * image->TexFormat->TexelBytes;
+      const GLuint bytes = width * height * image->TexFormat->TexelBytes;
+      memcpy(dst, src, bytes);
+   }
+
+}
+
+/* Upload the texture image associated with texture `t' at level `level'
+ * at the address relative to `start'.
+ */
+static void mach64UploadLocalSubImage( mach64ContextPtr mmesa,
+				  mach64TexObjPtr t, int level,
+				  int x, int y, int width, int height )
+{
+   struct gl_texture_image *image;
+   int texelsPerDword = 0;
+   int imageWidth, imageHeight;
+   int remaining, rows;
+   int format, dwords;
+   const int maxdwords = (MACH64_BUFFER_MAX_DWORDS - (MACH64_HOSTDATA_BLIT_OFFSET / 4));
+   CARD32 pitch, offset;
+   int i;
+
+   /* Ensure we have a valid texture to upload */
+   if ( ( level < 0 ) || ( level > mmesa->glCtx->Const.MaxTextureLevels ) )
+      return;
+
+   image = t->base.tObj->Image[0][level];
+   if ( !image )
+      return;
+
+   switch ( image->TexFormat->TexelBytes ) {
+   case 1: texelsPerDword = 4; break;
+   case 2: texelsPerDword = 2; break;
+   case 4: texelsPerDword = 1; break;
+   }
+
+#if 1
+   /* FIXME: The subimage index calcs are wrong... */
+   x = 0;
+   y = 0;
+   width = image->Width;
+   height = image->Height;
+#endif
+
+   imageWidth  = image->Width;
+   imageHeight = image->Height;
+
+   format = t->textureFormat;
+
+   /* The texel upload routines have a minimum width, so force the size
+    * if needed.
+    */
+   if ( imageWidth < texelsPerDword ) {
+      int factor;
+
+      factor = texelsPerDword / imageWidth;
+      imageWidth = texelsPerDword;
+      imageHeight /= factor;
+      if ( imageHeight == 0 ) {
+	 /* In this case, the texel converter will actually walk a
+	  * texel or two off the end of the image, but normal malloc
+	  * alignment should prevent it from ever causing a fault.
+	  */
+	 imageHeight = 1;
+      }
+   }
+
+   /* We can't upload to a pitch less than 64 texels so we will need to
+    * linearly upload all modified rows for textures smaller than this.
+    * This makes the x/y/width/height different for the blitter and the
+    * texture walker.
+    */
+   if ( imageWidth >= 64 ) {
+      /* The texture walker and the blitter look identical */
+      pitch = imageWidth >> 3;
+   } else {
+      int factor;
+      int y2;
+      int start, end;
+
+      start = (y * imageWidth) & ~63;
+      end = (y + height) * imageWidth;
+
+      if ( end - start < 64 ) {
+	 /* Handle the case where the total number of texels
+	  * uploaded is < 64.
+	  */
+	 x = 0;
+	 y = start / 64;
+	 width = end - start;
+	 height = 1;
+      } else {
+	 /* Upload some number of full 64 texel blit rows */
+	 factor = 64 / imageWidth;
+
+	 y2 = y + height - 1;
+	 y /= factor;
+	 y2 /= factor;
+
+	 x = 0;
+	 width = 64;
+	 height = y2 - y + 1;
+      }
+
+      /* Fixed pitch of 64 */
+      pitch = 8;
+   }
+
+   dwords = width * height / texelsPerDword;
+   offset = t->bufAddr;
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_textureBytes += (dwords << 2);
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "mach64UploadSubImage: %d,%d of %d,%d at %d,%d\n",
+	       width, height, image->Width, image->Height, x, y );
+      fprintf( stderr, "            blit ofs: 0x%07x pitch: 0x%x dwords: %d\n",
+	       (GLuint)offset, (GLint)width, dwords );
+   }
+
+   /* Subdivide the texture if required (account for the registers added by the drm) */
+   if ( dwords <= maxdwords ) {
+      rows = height;
+   } else {
+      rows = (maxdwords * texelsPerDword) / (2 * width);
+   }
+
+   for ( i = 0, remaining = height ;
+	 remaining > 0 ;
+	 remaining -= rows, y += rows, i++ )
+   {
+       height = MIN2(remaining, rows);
+
+       assert(image->Data);
+
+       {
+          const GLubyte *src = (const GLubyte *) image->Data +
+             (y * image->Width + x) * image->TexFormat->TexelBytes;
+
+          mach64FireBlitLocked( mmesa, (void *)src, offset, pitch, format,
+				x, y, width, height );
+       }
+
+   }
+
+   mmesa->new_state |= MACH64_NEW_CONTEXT;
+   mmesa->dirty |= MACH64_UPLOAD_CONTEXT | MACH64_UPLOAD_MISC;
+}
+
+
+/* Upload the texture images associated with texture `t'.  This might
+ * require removing our own and/or other client's texture objects to
+ * make room for these images.
+ */
+void mach64UploadTexImages( mach64ContextPtr mmesa, mach64TexObjPtr t )
+{
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %p )\n",
+	       __FUNCTION__, mmesa->glCtx, t );
+   }
+
+   assert(t);
+   assert(t->base.tObj);
+
+   if ( !t->base.memBlock ) {
+      int heap;
+
+      /* NULL heaps are skipped */
+      heap = driAllocateTexture( mmesa->texture_heaps, MACH64_NR_TEX_HEAPS,
+				 (driTextureObject *) t );
+
+      if ( heap == -1 ) {
+	 fprintf( stderr, "%s: upload texture failure, sz=%d\n", __FUNCTION__,
+		  t->base.totalSize );
+	 exit(-1);
+	 return;
+      }
+
+      t->heap = heap;
+
+      /* Set the base offset of the texture image */
+      t->bufAddr = mmesa->mach64Screen->texOffset[heap] + t->base.memBlock->ofs;
+
+      /* Force loading the new state into the hardware */
+      mmesa->dirty |= (MACH64_UPLOAD_SCALE_3D_CNTL |
+		       MACH64_UPLOAD_TEXTURE);
+   }
+
+   /* Let the world know we've used this memory recently */
+   driUpdateTextureLRU( (driTextureObject *) t );
+
+   /* Upload any images that are new */
+   if ( t->base.dirty_images[0] ) {
+      const GLint j = t->base.tObj->BaseLevel;
+      if (t->heap == MACH64_AGP_HEAP) {
+	 /* Need to make sure any vertex buffers in the queue complete */
+	 mach64WaitForIdleLocked( mmesa );
+	 mach64UploadAGPSubImage( mmesa, t, j, 0, 0,
+				  t->base.tObj->Image[0][j]->Width,
+				  t->base.tObj->Image[0][j]->Height );
+      } else {
+	 mach64UploadLocalSubImage( mmesa, t, j, 0, 0,
+				    t->base.tObj->Image[0][j]->Width,
+				    t->base.tObj->Image[0][j]->Height );
+      }
+
+      mmesa->setup.tex_cntl |= MACH64_TEX_CACHE_FLUSH;
+      t->base.dirty_images[0] = 0;
+   }
+
+   mmesa->dirty |= MACH64_UPLOAD_TEXTURE;
+}
+
+
+/* Allocate memory from the same texture heap `heap' for both textures
+ * `u0' and `u1'.
+ */
+static int mach64AllocateMultiTex( mach64ContextPtr mmesa,
+				   mach64TexObjPtr u0,
+				   mach64TexObjPtr u1,
+				   int heap, GLboolean alloc_u0 )
+{
+   /* Both objects should be bound */
+   assert( u0->base.bound && u1->base.bound );
+
+   if ( alloc_u0 ) {
+      /* Evict u0 from its current heap */
+      if ( u0->base.memBlock ) {
+	 assert( u0->heap != heap );
+	 driSwapOutTextureObject( (driTextureObject *) u0 );
+      }
+
+      /* Try to allocate u0 in the chosen heap */
+      u0->heap = driAllocateTexture( &mmesa->texture_heaps[heap], 1,
+				     (driTextureObject *) u0 );
+
+      if ( u0->heap == -1 ) {
+	 return -1;
+      }
+   }
+
+   /* Evict u1 from its current heap */
+   if ( u1->base.memBlock ) {
+      assert( u1->heap != heap );
+      driSwapOutTextureObject( (driTextureObject *) u1 );
+   }
+
+   /* Try to allocate u1 in the same heap as u0 */
+   u1->heap = driAllocateTexture( &mmesa->texture_heaps[heap], 1,
+				  (driTextureObject *) u1 );
+
+   if ( u1->heap == -1 ) {
+      return -1;
+   }
+
+   /* Bound objects are not evicted */
+   assert( u0->base.memBlock && u1->base.memBlock );
+   assert( u0->heap == u1->heap );
+
+   return heap;
+}
+
+/* The mach64 needs to have both primary and secondary textures in either
+ * local or AGP memory, so we need a "buddy system" to make sure that allocation
+ * succeeds or fails for both textures.
+ */
+void mach64UploadMultiTexImages( mach64ContextPtr mmesa, 
+				 mach64TexObjPtr t0,
+				 mach64TexObjPtr t1 )
+{
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %p %p )\n",
+	       __FUNCTION__, mmesa->glCtx, t0, t1 );
+   }
+
+   assert(t0 && t1);
+   assert(t0->base.tObj && t1->base.tObj);
+
+   if ( !t0->base.memBlock || !t1->base.memBlock || t0->heap != t1->heap ) {
+      mach64TexObjPtr u0 = NULL;
+      mach64TexObjPtr u1 = NULL;
+      unsigned totalSize = t0->base.totalSize + t1->base.totalSize;
+
+      int heap, ret;
+
+      /* Check if one of the textures is already swapped in a heap and the
+       * other texture fits in that heap.
+       */
+      if ( t0->base.memBlock && totalSize <= t0->base.heap->size ) {
+	 u0 = t0;
+	 u1 = t1;
+      } else if ( t1->base.memBlock && totalSize <= t1->base.heap->size ) {
+	 u0 = t1;
+	 u1 = t0;
+      }
+
+      if ( u0 ) {
+	 heap = u0->heap;
+
+	 ret = mach64AllocateMultiTex( mmesa, u0, u1, heap, GL_FALSE );
+      } else {
+	 /* Both textures are swapped out or collocation is impossible */
+	 u0 = t0;
+	 u1 = t1;
+
+	 /* Choose the heap appropriately */
+	 heap = MACH64_CARD_HEAP;
+
+	 if ( totalSize > mmesa->texture_heaps[heap]->size ) {
+	    heap = MACH64_AGP_HEAP;
+	 }
+
+	 ret = mach64AllocateMultiTex( mmesa, u0, u1, heap, GL_TRUE );
+      }
+
+      if ( ret == -1 && heap == MACH64_CARD_HEAP ) {
+	 /* Try AGP if local memory failed */
+	 heap = MACH64_AGP_HEAP;
+
+	 ret = mach64AllocateMultiTex( mmesa, u0, u1, heap, GL_TRUE );
+      }
+
+      if ( ret == -1 ) {
+	 /* FIXME:
+	  * Swap out all textures from the AGP heap and re-run allocation, this
+	  * should succeed in all cases.
+	  */
+	 fprintf( stderr, "%s: upload multi-texture failure, sz0=%d sz1=%d\n",
+		  __FUNCTION__, t0->base.totalSize, t1->base.totalSize );
+	 exit(-1);
+      }
+
+      /* Set the base offset of the texture image */
+      t0->bufAddr = mmesa->mach64Screen->texOffset[heap] + t0->base.memBlock->ofs;
+      t1->bufAddr = mmesa->mach64Screen->texOffset[heap] + t1->base.memBlock->ofs;
+
+      /* Force loading the new state into the hardware */
+      mmesa->dirty |= (MACH64_UPLOAD_SCALE_3D_CNTL |
+		       MACH64_UPLOAD_TEXTURE);
+   }
+
+   /* Let the world know we've used this memory recently */
+   driUpdateTextureLRU( (driTextureObject *) t0 );
+   driUpdateTextureLRU( (driTextureObject *) t1 );
+
+   /* Upload any images that are new */
+   if ( t0->base.dirty_images[0] ) {
+      const GLint j0 = t0->base.tObj->BaseLevel;
+      if (t0->heap == MACH64_AGP_HEAP) {
+	 /* Need to make sure any vertex buffers in the queue complete */
+	 mach64WaitForIdleLocked( mmesa );
+	 mach64UploadAGPSubImage( mmesa, t0, j0, 0, 0,
+				    t0->base.tObj->Image[0][j0]->Width,
+				    t0->base.tObj->Image[0][j0]->Height );
+      } else {
+	 mach64UploadLocalSubImage( mmesa, t0, j0, 0, 0,
+				    t0->base.tObj->Image[0][j0]->Width,
+				    t0->base.tObj->Image[0][j0]->Height );
+      }
+      mmesa->setup.tex_cntl |= MACH64_TEX_CACHE_FLUSH;
+      t0->base.dirty_images[0] = 0;
+   }
+   if ( t1->base.dirty_images[0] ) {
+      const GLint j1 = t1->base.tObj->BaseLevel;
+      if (t1->heap == MACH64_AGP_HEAP) {
+	 /* Need to make sure any vertex buffers in the queue complete */
+	 mach64WaitForIdleLocked( mmesa );
+	 mach64UploadAGPSubImage( mmesa, t1, j1, 0, 0,
+			       t1->base.tObj->Image[0][j1]->Width,
+			       t1->base.tObj->Image[0][j1]->Height );
+      } else {
+	 mach64UploadLocalSubImage( mmesa, t1, j1, 0, 0,
+			       t1->base.tObj->Image[0][j1]->Width,
+			       t1->base.tObj->Image[0][j1]->Height );
+      }
+      
+      mmesa->setup.tex_cntl |= MACH64_TEX_CACHE_FLUSH;
+      t1->base.dirty_images[0] = 0;
+   }
+
+   mmesa->dirty |= MACH64_UPLOAD_TEXTURE;
+}
diff --git a/src/mach64_texstate.c b/src/mach64_texstate.c
new file mode 100644
index 0000000..3ace370
--- /dev/null
+++ b/src/mach64_texstate.c
@@ -0,0 +1,525 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "context.h"
+#include "macros.h"
+#include "texformat.h"
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_state.h"
+#include "mach64_vb.h"
+#include "mach64_tris.h"
+#include "mach64_tex.h"
+
+static void mach64SetTexImages( mach64ContextPtr mmesa,
+                              const struct gl_texture_object *tObj )
+{
+   mach64TexObjPtr t = (mach64TexObjPtr) tObj->DriverData;
+   struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+   int totalSize;
+
+   assert(t);
+   assert(baseImage);
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API )
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, tObj );
+
+   switch (baseImage->TexFormat->MesaFormat) {
+   case MESA_FORMAT_ARGB8888:
+      t->textureFormat = MACH64_DATATYPE_ARGB8888;
+      break;
+   case MESA_FORMAT_ARGB4444:
+      t->textureFormat = MACH64_DATATYPE_ARGB4444;
+      break;
+   case MESA_FORMAT_RGB565:
+      t->textureFormat = MACH64_DATATYPE_RGB565;
+      break;
+   case MESA_FORMAT_ARGB1555:
+      t->textureFormat = MACH64_DATATYPE_ARGB1555;
+      break;
+   case MESA_FORMAT_RGB332:
+      t->textureFormat = MACH64_DATATYPE_RGB332;
+      break;
+   case MESA_FORMAT_RGB888:
+      t->textureFormat = MACH64_DATATYPE_RGB8;
+      break;
+   case MESA_FORMAT_CI8:
+      t->textureFormat = MACH64_DATATYPE_CI8;
+      break;
+   case MESA_FORMAT_YCBCR:
+      t->textureFormat = MACH64_DATATYPE_YVYU422;
+      break;
+   case MESA_FORMAT_YCBCR_REV:
+      t->textureFormat = MACH64_DATATYPE_VYUY422;
+      break;
+   default:
+      _mesa_problem(mmesa->glCtx, "Bad texture format in %s", __FUNCTION__);
+   };
+
+   totalSize = ( baseImage->Height *
+		 baseImage->Width *
+		 baseImage->TexFormat->TexelBytes );
+
+   totalSize = (totalSize + 31) & ~31;
+
+   t->base.totalSize = totalSize;
+   t->base.firstLevel = tObj->BaseLevel;
+   t->base.lastLevel = tObj->BaseLevel;
+
+   /* Set the texture format */
+   if ( ( baseImage->_BaseFormat == GL_RGBA ) ||
+	( baseImage->_BaseFormat == GL_ALPHA ) ||
+	( baseImage->_BaseFormat == GL_LUMINANCE_ALPHA ) ) {
+      t->hasAlpha = 1;
+   } else {
+      t->hasAlpha = 0;
+   }
+
+   t->widthLog2 = baseImage->WidthLog2;
+   t->heightLog2 = baseImage->HeightLog2;
+   t->maxLog2 = baseImage->MaxLog2;
+}
+
+static void mach64UpdateTextureEnv( GLcontext *ctx, int unit )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLint source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = texUnit->_Current;
+   const GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+   GLuint s = mmesa->setup.scale_3d_cntl;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %d )\n",
+	       __FUNCTION__, ctx, unit );
+   }
+
+/*                 REPLACE  MODULATE   DECAL              GL_BLEND
+ *
+ * ALPHA           C = Cf   C = Cf     undef              C = Cf
+ *                 A = At   A = AfAt                      A = AfAt
+ *
+ * LUMINANCE       C = Ct   C = CfCt   undef              C = Cf(1-Ct)+CcCt 
+ *                 A = Af   A = Af                        A = Af
+ *
+ * LUMINANCE_ALPHA C = Ct   C = CfCt   undef              C = Cf(1-Ct)+CcCt
+ *                 A = At   A = AfAt                      A = AfAt
+ *
+ * INTENSITY       C = Ct   C = CfCt   undef              C = Cf(1-Ct)+CcCt
+ *                 A = At   A = AfAt                      A = Af(1-At)+AcAt
+ *
+ * RGB             C = Ct   C = CfCt   C = Ct             C = Cf(1-Ct)+CcCt
+ *                 A = Af   A = Af     A = Af             A = Af
+ *
+ * RGBA            C = Ct   C = CfCt   C = Cf(1-At)+CtAt  C = Cf(1-Ct)+CcCt
+ *                 A = At   A = AfAt   A = Af             A = AfAt 
+ */
+
+
+   if ( unit == 0 ) {
+      s &= ~MACH64_TEX_LIGHT_FCN_MASK;
+
+      /* Set the texture environment state 
+       * Need to verify these are working correctly, but the
+       * texenv Mesa demo seems to work.
+       */
+      switch ( texUnit->EnvMode ) {
+      case GL_REPLACE:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_INTENSITY:
+	    /* Not compliant - can't get At */
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 default:
+	    s |= MACH64_TEX_LIGHT_FCN_REPLACE;
+	 }
+	 break;
+      case GL_MODULATE:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_RGB:
+	 case GL_LUMINANCE:
+	    /* These should be compliant */
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_INTENSITY:
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_RGBA:
+	    /* Should fallback when blending enabled for complete compliance */
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 default:
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 }
+	 break;
+      case GL_DECAL:
+	 switch ( format ) {
+	 case GL_RGBA: 
+	    s |= MACH64_TEX_LIGHT_FCN_ALPHA_DECAL;
+	    break;
+	 case GL_RGB:
+	    s |= MACH64_TEX_LIGHT_FCN_REPLACE;
+	    break;
+	 case GL_ALPHA:
+	 case GL_LUMINANCE_ALPHA:
+	    /* undefined - disable texturing, pass fragment unmodified  */
+	    /* Also, pass fragment alpha instead of texture alpha */
+	    s &= ~MACH64_TEX_MAP_AEN;
+	    s |= MACH64_TEXTURE_DISABLE;
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_LUMINANCE:
+	 case GL_INTENSITY:
+	    /* undefined - disable texturing, pass fragment unmodified  */
+	    s |= MACH64_TEXTURE_DISABLE;
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 default:
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 }
+	 break;
+      case GL_BLEND:
+	 /* GL_BLEND not supported by RagePRO, use software */
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 break;
+      case GL_ADD:
+      case GL_COMBINE:
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 break;
+      default:
+	 s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+      }
+
+      if ( mmesa->setup.scale_3d_cntl != s ) {
+	 mmesa->setup.scale_3d_cntl = s;
+	 mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+      }
+
+   } else {
+      /* blend = 0, modulate = 1 - initialize to blend */
+      mmesa->setup.tex_cntl &= ~MACH64_COMP_COMBINE_MODULATE;
+      /* Set the texture composite function for multitexturing*/
+      switch ( texUnit->EnvMode ) {
+      case GL_BLEND:
+	 /* GL_BLEND not supported by RagePRO, use software */
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 break;
+      case GL_MODULATE:
+	 /* Should fallback when blending enabled for complete compliance */
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 break;
+      case GL_REPLACE:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	    mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	    break;
+	 default: /* not supported by RagePRO */
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 }
+	 break;
+      case GL_DECAL:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	 case GL_LUMINANCE:
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_INTENSITY:
+	    /* undefined, disable compositing and pass fragment unmodified */
+	    mmesa->setup.tex_cntl &= ~MACH64_TEXTURE_COMPOSITE;
+	    break;
+	 default: /* not supported by RagePRO */
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 }
+	 break;
+      case GL_ADD:
+      case GL_COMBINE:
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 break;
+      default:
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+      }
+   }
+}
+
+
+static void mach64UpdateTextureUnit( GLcontext *ctx, int unit )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   int source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = ctx->Texture.Unit[source]._Current;
+   mach64TexObjPtr t = tObj->DriverData;
+   GLuint d = mmesa->setup.dp_pix_width;
+   GLuint s = mmesa->setup.scale_3d_cntl;
+
+   assert(unit == 0 || unit == 1);  /* only two tex units */
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %d ) enabled=0x%x 0x%x\n",
+	       __FUNCTION__, ctx, unit, ctx->Texture.Unit[0]._ReallyEnabled,
+	       ctx->Texture.Unit[1]._ReallyEnabled);
+   }
+
+   if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
+
+      assert(t);  /* should have driver tex data by now */
+
+      /* Fallback if there's a texture border */
+      if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
+         FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+         return;
+      }
+
+      /* Upload teximages */
+      if (t->base.dirty_images[0]) {
+         mach64SetTexImages( mmesa, tObj );
+	 mmesa->dirty |= (MACH64_UPLOAD_TEX0IMAGE << unit);
+      }
+
+      /* Bind to the given texture unit */
+      mmesa->CurrentTexObj[unit] = t;
+      t->base.bound |= (1 << unit);
+
+      if ( t->base.memBlock )
+         driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
+
+      /* register setup */
+      if ( unit == 0 ) {
+         d &= ~MACH64_SCALE_PIX_WIDTH_MASK;
+         d |= (t->textureFormat << 28);
+   
+         s &= ~(MACH64_TEXTURE_DISABLE |
+		MACH64_TEX_CACHE_SPLIT |
+		MACH64_TEX_BLEND_FCN_MASK |
+		MACH64_TEX_MAP_AEN);
+   
+         if ( mmesa->multitex ) {
+	    s |= MACH64_TEX_BLEND_FCN_TRILINEAR | MACH64_TEX_CACHE_SPLIT;
+         } else if ( t->BilinearMin ) {
+	    s |= MACH64_TEX_BLEND_FCN_LINEAR;
+         } else {
+	    s |= MACH64_TEX_BLEND_FCN_NEAREST;
+         }
+         if ( t->BilinearMag ) {
+	    s |=  MACH64_BILINEAR_TEX_EN;
+         } else {
+	    s &= ~MACH64_BILINEAR_TEX_EN;
+         }
+   
+         if ( t->hasAlpha ) {
+	    s |= MACH64_TEX_MAP_AEN;
+         }
+   
+         mmesa->setup.tex_cntl &= ~(MACH64_TEXTURE_CLAMP_S |
+				    MACH64_TEXTURE_CLAMP_T |
+				    MACH64_SECONDARY_STW);
+   
+         if ( t->ClampS ) {
+	    mmesa->setup.tex_cntl |= MACH64_TEXTURE_CLAMP_S;
+         }
+         if ( t->ClampT ) {
+	    mmesa->setup.tex_cntl |= MACH64_TEXTURE_CLAMP_T;
+         }
+   
+         mmesa->setup.tex_size_pitch |= ((t->widthLog2  << 0) |
+					 (t->maxLog2    << 4) |
+					 (t->heightLog2 << 8));
+      } else {
+         
+         /* Enable texture mapping mode */
+         s &= ~MACH64_TEXTURE_DISABLE;
+   
+         d &= ~MACH64_COMPOSITE_PIX_WIDTH_MASK;
+         d |= (t->textureFormat << 4);
+   
+         mmesa->setup.tex_cntl &= ~(MACH64_COMP_ALPHA |
+				    MACH64_SEC_TEX_CLAMP_S |
+				    MACH64_SEC_TEX_CLAMP_T);
+         mmesa->setup.tex_cntl |= (MACH64_TEXTURE_COMPOSITE |
+				   MACH64_SECONDARY_STW);
+   
+         if ( t->BilinearMin ) {
+	    mmesa->setup.tex_cntl |= MACH64_COMP_BLEND_BILINEAR;
+         } else {
+	    mmesa->setup.tex_cntl &= ~MACH64_COMP_BLEND_BILINEAR;
+         }
+         if ( t->BilinearMag ) {
+	    mmesa->setup.tex_cntl |=  MACH64_COMP_FILTER_BILINEAR;
+         } else {
+	    mmesa->setup.tex_cntl &= ~MACH64_COMP_FILTER_BILINEAR;
+         }
+         
+         if ( t->hasAlpha ) {
+	    mmesa->setup.tex_cntl |= MACH64_COMP_ALPHA;
+         }
+         if ( t->ClampS ) {
+	    mmesa->setup.tex_cntl |= MACH64_SEC_TEX_CLAMP_S;
+         }
+         if ( t->ClampT ) {
+	    mmesa->setup.tex_cntl |= MACH64_SEC_TEX_CLAMP_T;
+         }
+   
+         mmesa->setup.tex_size_pitch |= ((t->widthLog2  << 16) |
+					 (t->maxLog2    << 20) |
+					 (t->heightLog2 << 24));
+      }
+   
+      if ( mmesa->setup.scale_3d_cntl != s ) {
+         mmesa->setup.scale_3d_cntl = s;
+         mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+      }
+   
+      if ( mmesa->setup.dp_pix_width != d ) {
+         mmesa->setup.dp_pix_width = d;
+         mmesa->dirty |= MACH64_UPLOAD_DP_PIX_WIDTH;
+      }  
+   }
+   else if (texUnit->_ReallyEnabled) {
+      /* 3D or cube map texture enabled - fallback */
+      FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+   }
+   else {
+      /* texture unit disabled */
+   }
+}
+
+
+/* Update the hardware texture state */
+void mach64UpdateTextureState( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p ) en=0x%x 0x%x\n",
+	       __FUNCTION__, ctx, ctx->Texture.Unit[0]._ReallyEnabled,
+	       ctx->Texture.Unit[1]._ReallyEnabled);
+   }
+
+   /* Clear any texturing fallbacks */
+   FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_FALSE );
+
+   /* Unbind any currently bound textures */
+   if ( mmesa->CurrentTexObj[0] ) mmesa->CurrentTexObj[0]->base.bound = 0;
+   if ( mmesa->CurrentTexObj[1] ) mmesa->CurrentTexObj[1]->base.bound = 0;
+   mmesa->CurrentTexObj[0] = NULL;
+   mmesa->CurrentTexObj[1] = NULL;
+
+   /* Disable all texturing until it is known to be good */
+   mmesa->setup.scale_3d_cntl  |=  MACH64_TEXTURE_DISABLE;
+   mmesa->setup.scale_3d_cntl  &= ~MACH64_TEX_MAP_AEN;
+   mmesa->setup.tex_cntl       &= ~MACH64_TEXTURE_COMPOSITE;
+
+   mmesa->setup.tex_size_pitch = 0x00000000;
+
+   mmesa->tmu_source[0] = 0;
+   mmesa->tmu_source[1] = 1;
+   mmesa->multitex = 0;
+
+   if (ctx->Texture._EnabledUnits & 0x2) {
+       /* unit 1 enabled */
+       if (ctx->Texture._EnabledUnits & 0x1) {
+	  /* units 0 and 1 enabled */
+	  mmesa->multitex = 1;
+	  mach64UpdateTextureUnit( ctx, 0 );
+	  mach64UpdateTextureEnv( ctx, 0 );
+	  mach64UpdateTextureUnit( ctx, 1 );
+	  mach64UpdateTextureEnv( ctx, 1 );
+       } else {
+	  mmesa->tmu_source[0] = 1;
+	  mmesa->tmu_source[1] = 0;
+	  mach64UpdateTextureUnit( ctx, 0 );
+	  mach64UpdateTextureEnv( ctx, 0 );
+       }
+   } else if (ctx->Texture._EnabledUnits & 0x1) {
+      /* only unit 0 enabled */ 
+      mach64UpdateTextureUnit( ctx, 0 );
+      mach64UpdateTextureEnv( ctx, 0 );
+   }
+
+   mmesa->dirty |= (MACH64_UPLOAD_SCALE_3D_CNTL |
+		    MACH64_UPLOAD_TEXTURE);
+}
+
+
+/* Due to the way we must program texture state into the Rage Pro,
+ * we must leave these calculations to the absolute last minute.
+ */
+void mach64EmitTexStateLocked( mach64ContextPtr mmesa,
+			       mach64TexObjPtr t0,
+			       mach64TexObjPtr t1 )
+{
+   drm_mach64_sarea_t *sarea = mmesa->sarea;
+   drm_mach64_context_regs_t *regs = &(mmesa->setup);
+
+   /* for multitex, both textures must be local or AGP */
+   if ( t0 && t1 )
+      assert(t0->heap == t1->heap);
+
+   if ( t0 ) {
+      if (t0->heap == MACH64_CARD_HEAP) {
+#if ENABLE_PERF_BOXES
+	 mmesa->c_texsrc_card++;
+#endif
+	 mmesa->setup.tex_cntl &= ~MACH64_TEX_SRC_AGP;
+      } else {
+#if ENABLE_PERF_BOXES
+	 mmesa->c_texsrc_agp++;
+#endif
+	 mmesa->setup.tex_cntl |= MACH64_TEX_SRC_AGP;
+      }
+      mmesa->setup.tex_offset = t0->bufAddr;
+   }
+
+   if ( t1 ) {
+      mmesa->setup.secondary_tex_off = t1->bufAddr;
+   }
+
+   memcpy( &sarea->context_state.tex_size_pitch, &regs->tex_size_pitch,
+	   MACH64_NR_TEXTURE_REGS * sizeof(GLuint) );
+}
+
diff --git a/src/mach64_tris.c b/src/mach64_tris.c
new file mode 100644
index 0000000..08cc184
--- /dev/null
+++ b/src/mach64_tris.c
@@ -0,0 +1,1924 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "macros.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "mach64_tris.h"
+#include "mach64_state.h"
+#include "mach64_context.h"
+#include "mach64_vb.h"
+#include "mach64_ioctl.h"
+
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   MACH64_PRIM_POINTS,
+   MACH64_PRIM_LINES,
+   MACH64_PRIM_LINE_LOOP,
+   MACH64_PRIM_LINE_STRIP,
+   MACH64_PRIM_TRIANGLES,
+   MACH64_PRIM_TRIANGLE_STRIP,
+   MACH64_PRIM_TRIANGLE_FAN,
+   MACH64_PRIM_QUADS,
+   MACH64_PRIM_QUAD_STRIP,
+   MACH64_PRIM_POLYGON,
+};
+
+static void mach64RasterPrimitive( GLcontext *ctx, GLuint hwprim );
+static void mach64RenderPrimitive( GLcontext *ctx, GLenum prim );
+
+
+/* FIXME: Remove this when native template is finished. */
+#define MACH64_PRINT_BUFFER 0
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#if defined(USE_X86_ASM)
+#define DO_COPY_VERTEX( vb, vertsize, v, n, m )					\
+do {										\
+   register const CARD32 *__p __asm__( "esi" ) = (CARD32 *)v + 10 - vertsize;	\
+   register int __s __asm__( "ecx" ) = vertsize;				\
+   if ( vertsize > 7 ) {							\
+      *vb++ = (2 << 16) | ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S );		\
+      __asm__ __volatile__( "movsl ; movsl ; movsl"				\
+			    : "=D" (vb), "=S" (__p)				\
+			    : "0" (vb), "1" (__p) );				\
+      __s -= 3;									\
+   }										\
+   *vb++ = ((__s - 1 + m) << 16) |						\
+   	   (ADRINDEX( MACH64_VERTEX_##n##_X_Y ) - (__s - 1) );			\
+   __asm__ __volatile__( "rep ; movsl"						\
+			 : "=%c" (__s), "=D" (vb), "=S" (__p)			\
+			 : "0" (__s), "1" (vb), "2" (__p) );			\
+} while (0)
+#else
+#define DO_COPY_VERTEX( vb, vertsize, v, n, m )				\
+do {									\
+   CARD32 *__p = (CARD32 *)v + 10 - vertsize;				\
+   int __s = vertsize;							\
+   if ( vertsize > 7 ) {						\
+      LE32_OUT( vb++, (2 << 16) |					\
+	    	      ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S ) );	\
+      *vb++ = *__p++;							\
+      *vb++ = *__p++;							\
+      *vb++ = *__p++;							\
+      __s -= 3;								\
+   }									\
+   LE32_OUT( vb++, ((__s - 1 + m) << 16) |				\
+	           (ADRINDEX( MACH64_VERTEX_##n##_X_Y ) - (__s - 1)) );	\
+   while ( __s-- ) {							\
+      *vb++ = *__p++;							\
+   }									\
+} while (0)
+#endif
+
+#define COPY_VERTEX( vb, vertsize, v, n )	DO_COPY_VERTEX( vb, vertsize, v, n, 0 )
+#define COPY_VERTEX_OOA( vb, vertsize, v, n )	DO_COPY_VERTEX( vb, vertsize, v, n, 1 )
+
+
+static __inline void mach64_draw_quad( mach64ContextPtr mmesa,
+				       mach64VertexPtr v0,
+				       mach64VertexPtr v1,
+				       mach64VertexPtr v2,
+				       mach64VertexPtr v3 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   const GLuint vertsize = mmesa->vertex_size;
+   GLint a;
+   GLfloat ooa;
+   GLuint xy;
+   const GLuint xyoffset = 9;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+      fprintf(stderr,"Vertex 2:\n");
+      mach64_print_vertex( ctx, v1 );
+      fprintf(stderr,"Vertex 3:\n");
+      mach64_print_vertex( ctx, v2 );
+      fprintf(stderr,"Vertex 4:\n");
+      mach64_print_vertex( ctx, v3 );
+   }
+   
+   xy = LE32_IN( &v0->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v1->ui[xyoffset] );
+   xx[1] = (GLshort)( xy >> 16 );
+   yy[1] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v3->ui[xyoffset] );
+   xx[2] = (GLshort)( xy >> 16 );
+   yy[2] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+
+   if ( (mmesa->backface_sign &&
+	((a < 0 && !signbit( mmesa->backface_sign )) || 
+	(a > 0 && signbit( mmesa->backface_sign )))) ) {
+      /* cull quad */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Quad culled\n");
+      return;
+   }
+   
+   ooa = 16.0 / a;
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   COPY_VERTEX_OOA( vb, vertsize, v3, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   xy = LE32_IN( &v2->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+   
+   ooa = 16.0 / a;
+   
+   COPY_VERTEX_OOA( vb, vertsize, v2, 1 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   assert( vb == vbchk );
+   
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "quad:\n");
+      for (i = 0; i < vbsiz; i++)
+	 fprintf(stderr, "  %08lx\n", *(vb - vbsiz + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#else
+   GLuint vertsize = mmesa->vertex_size;
+   GLint coloridx;
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 4 + 4);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w, v0->v.u0, v0->v.v0, v0->v.u1, v0->v.v1);
+      fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v1->v.x, v1->v.y, v1->v.z, v1->v.w, v1->v.u0, v1->v.v0, v1->v.u1, v1->v.v1);
+      fprintf(stderr,"Vertex 3: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v2->v.x, v2->v.y, v2->v.z, v2->v.w, v2->v.u0, v2->v.v0, v2->v.u1, v2->v.v1);
+      fprintf(stderr,"Vertex 4: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v3->v.x, v3->v.y, v3->v.z, v3->v.w, v3->v.u0, v3->v.v0, v3->v.u1, v3->v.v1);
+   }
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   xx[0] = (GLint)(v0->v.x * 4);
+   yy[0] = (GLint)(v0->v.y * 4);
+
+   xx[1] = (GLint)(v1->v.x * 4);
+   yy[1] = (GLint)(v1->v.y * 4);
+
+   xx[2] = (GLint)(v3->v.x * 4);
+   yy[2] = (GLint)(v3->v.y * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull quad */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Quad culled\n");
+      return;
+   }
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                            /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v0->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v1->ui[coloridx];                            /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v1->ui[8] ); /* MACH64_VERTEX_2_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[9] ); /* MACH64_VERTEX_2_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v3->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v3->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v3->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v3->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v3->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v3->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v3->ui[8] ); /* MACH64_VERTEX_3_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v3->ui[9] ); /* MACH64_VERTEX_3_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v3->ui[3] ); /* MACH64_VERTEX_3_SECONDARY_W */
+   }
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   xx[0] = (GLint)(v2->v.x * 4);
+   yy[0] = (GLint)(v2->v.y * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   ooa = 1.0 / ooa;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v2->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v2->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v2->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v2->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v2->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
+   }
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "quad:\n");
+      for (i = 0; i < vbsiz; i++)
+	 fprintf(stderr, "  %08lx\n", *(vb + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#endif
+}
+
+static __inline void mach64_draw_triangle( mach64ContextPtr mmesa,
+					   mach64VertexPtr v0,
+					   mach64VertexPtr v1,
+					   mach64VertexPtr v2 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   GLuint vertsize = mmesa->vertex_size;
+   GLint a;
+   GLfloat ooa;
+   GLuint xy;
+   const GLuint xyoffset = 9;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 3 + 1;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+      fprintf(stderr,"Vertex 2:\n");
+      mach64_print_vertex( ctx, v1 );
+      fprintf(stderr,"Vertex 3:\n");
+      mach64_print_vertex( ctx, v2 );
+   }
+   
+   xy = LE32_IN( &v0->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v1->ui[xyoffset] );
+   xx[1] = (GLshort)( xy >> 16 );
+   yy[1] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v2->ui[xyoffset] );
+   xx[2] = (GLshort)( xy >> 16 );
+   yy[2] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+   
+   if ( mmesa->backface_sign &&
+	((a < 0 && !signbit( mmesa->backface_sign )) || 
+	(a > 0 && signbit( mmesa->backface_sign ))) ) {
+      /* cull triangle */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Triangle culled\n");
+      return;
+   }
+   
+   ooa = 16.0 / a;
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   COPY_VERTEX_OOA( vb, vertsize, v2, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   assert( vb == vbchk );
+
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "tri:\n");
+      for (i = 0; i < vbsiz; i++)
+	 fprintf(stderr, "  %08lx\n", *(vb - vbsiz + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#else
+   GLuint vertsize = mmesa->vertex_size;
+   GLint coloridx;
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 3 + 2);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w, v0->v.u0, v0->v.v0, v0->v.u1, v0->v.v1);
+      fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v1->v.x, v1->v.y, v1->v.z, v1->v.w, v1->v.u0, v1->v.v0, v1->v.u1, v1->v.v1);
+      fprintf(stderr,"Vertex 3: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v2->v.x, v2->v.y, v2->v.z, v2->v.w, v2->v.u0, v2->v.v0, v2->v.u1, v2->v.v1);
+   }
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   xx[0] = (GLint)(v0->v.x * 4);
+   yy[0] = (GLint)(v0->v.y * 4);
+
+   xx[1] = (GLint)(v1->v.x * 4);
+   yy[1] = (GLint)(v1->v.y * 4);
+
+   xx[2] = (GLint)(v2->v.x * 4);
+   yy[2] = (GLint)(v2->v.y * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull triangle */
+       if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Triangle culled\n");
+      return;
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v0->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v1->ui[coloridx];                             /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v1->ui[8] ); /* MACH64_VERTEX_2_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[9] ); /* MACH64_VERTEX_2_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v2->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v2->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v2->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v2->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v2->ui[8] ); /* MACH64_VERTEX_3_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[9] ); /* MACH64_VERTEX_3_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_3_SECONDARY_W */
+   }
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "tri:\n");
+      for (i = 0; i < vbsiz; ++i)
+	 fprintf(stderr, "  %08lx\n", *(vb + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#endif
+}
+
+static __inline void mach64_draw_line( mach64ContextPtr mmesa,
+				     mach64VertexPtr v0,
+				     mach64VertexPtr v1 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   const GLuint vertsize = mmesa->vertex_size;
+   GLint width = (GLint)(mmesa->glCtx->Line._Width * 2.0); /* 2 fractional bits for hardware */
+   GLfloat ooa;
+   GLuint *pxy0, *pxy1;
+   GLuint xy0old, xy0, xy1old, xy1;
+   const GLuint xyoffset = 9;
+   GLint x0, y0, x1, y1;
+   GLint dx, dy, ix, iy;
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+      fprintf(stderr,"Vertex 2:\n");
+      mach64_print_vertex( ctx, v1 );
+   }
+  
+   if( !width )
+      width = 1;	/* round to the nearest supported width */
+      
+   pxy0 = &v0->ui[xyoffset];
+   xy0old = *pxy0;
+   xy0 = LE32_IN( &xy0old );
+   x0 = (GLshort)( xy0 >> 16 );
+   y0 = (GLshort)( xy0 & 0xffff );
+   
+   pxy1 = &v1->ui[xyoffset];
+   xy1old = *pxy1;
+   xy1 = LE32_IN( &xy1old );
+   x1 = (GLshort)( xy1 >> 16 );
+   y1 = (GLshort)( xy1 & 0xffff );
+   
+   if ( (dx = x1 - x0) < 0 ) {
+      dx = -dx;
+   }
+   if ( (dy = y1 - y0) < 0 ) {
+      dy = -dy;
+   }
+   
+   /* adjust vertices depending on line direction */
+   if ( dx > dy ) {
+      ix = 0;
+      iy = width;
+      ooa = 8.0 / ((x1 - x0) * width);
+   } else {
+      ix = width;
+      iy = 0;
+      ooa = 8.0 / ((y0 - y1) * width);
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   LE32_OUT( pxy0, (( x0 - ix ) << 16) | (( y0 - iy ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   LE32_OUT( pxy1, (( x1 - ix ) << 16) | (( y1 - iy ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   LE32_OUT( pxy0, (( x0 + ix ) << 16) | (( y0 + iy ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v0, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   ooa = -ooa;
+   
+   LE32_OUT( pxy1, (( x1 + ix ) << 16) | (( y1 + iy ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v1, 1 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   *pxy0 = xy0old;
+   *pxy1 = xy1old;
+#else /* !MACH64_NATIVE_VTXFMT */
+   GLuint vertsize = mmesa->vertex_size;
+   GLint coloridx;
+   float width = 1.0; /* Only support 1 pix lines now */
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 4 + 4);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+   
+   GLfloat hw, dx, dy, ix, iy;
+   GLfloat x0 = v0->v.x;
+   GLfloat y0 = v0->v.y;
+   GLfloat x1 = v1->v.x;
+   GLfloat y1 = v1->v.y;
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w);
+      fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n", 
+	      v1->v.x, v1->v.y, v1->v.z, v1->v.w);
+   }
+
+   hw = 0.5F * width;
+   if (hw > 0.1F && hw < 0.5F) {
+      hw = 0.5F;
+   }
+
+   /* adjust vertices depending on line direction */
+   dx = v0->v.x - v1->v.x;
+   dy = v0->v.y - v1->v.y;
+   if (dx * dx > dy * dy) {
+      /* X-major line */
+      ix = 0.0F;
+      iy = hw;
+      if (x1 < x0) {
+         x0 += 0.5F;
+         x1 += 0.5F;
+      }
+      y0 -= 0.5F;
+      y1 -= 0.5F;
+   }
+   else {
+      /* Y-major line */
+      ix = hw;
+      iy = 0.0F;
+      if (y1 > y0) {
+         y0 -= 0.5F;
+         y1 -= 0.5F;
+      }
+      x0 += 0.5F;
+      x1 += 0.5F;
+   }
+
+   xx[0] = (GLint)((x0 - ix) * 4);
+   yy[0] = (GLint)((y0 - iy) * 4);
+
+   xx[1] = (GLint)((x1 - ix) * 4);
+   yy[1] = (GLint)((y1 - iy) * 4);
+
+   xx[2] = (GLint)((x0 + ix) * 4);
+   yy[2] = (GLint)((y0 + iy) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull line */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Line culled\n");
+      return;
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v1->ui[coloridx];                             /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   xx[0] = (GLint)((x1 + ix) * 4);
+   yy[0] = (GLint)((y1 + iy) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   ooa = 1.0 / ooa;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v1->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+#endif
+}
+
+static __inline void mach64_draw_point( mach64ContextPtr mmesa,
+				      mach64VertexPtr v0 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   const GLuint vertsize = mmesa->vertex_size;
+   GLint sz = (GLint)(mmesa->glCtx->Point._Size * 2.0); /* 2 fractional bits for hardware */
+   GLfloat ooa;
+   GLuint *pxy;
+   GLuint xyold, xy;
+   const GLuint xyoffset = 9;
+   GLint x, y;
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+   }
+  
+   if( !sz )
+      sz = 1;	/* round to the nearest supported size */
+      
+   pxy = &v0->ui[xyoffset];
+   xyold = *pxy;
+   xy = LE32_IN( &xyold );
+   x = (GLshort)( xy >> 16 );
+   y = (GLshort)( xy & 0xffff );
+   
+   ooa = 4.0 / (sz * sz);
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   LE32_OUT( pxy, (( x - sz ) << 16) | (( y - sz ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   LE32_OUT( pxy, (( x + sz ) << 16) | (( y - sz ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v0, 2 );
+   LE32_OUT( pxy, (( x - sz ) << 16) | (( y + sz ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v0, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   ooa = -ooa;
+   
+   LE32_OUT( pxy, (( x + sz ) << 16) | (( y + sz ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v0, 1 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   *pxy = xyold;
+#else /* !MACH64_NATIVE_VTXFMT */
+   GLuint vertsize = mmesa->vertex_size; 
+   GLint coloridx;
+   float sz = 1.0; /* Only support 1 pix points now */
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 4 + 4);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+   
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w);
+   }
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   xx[0] = (GLint)((v0->v.x - sz) * 4);
+   yy[0] = (GLint)((v0->v.y - sz) * 4);
+
+   xx[1] = (GLint)((v0->v.x + sz) * 4);
+   yy[1] = (GLint)((v0->v.y - sz) * 4);
+
+   xx[2] = (GLint)((v0->v.x - sz) * 4);
+   yy[2] = (GLint)((v0->v.y + sz) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull quad */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Point culled\n");
+      return;
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   xx[0] = (GLint)((v0->v.x + sz) * 4);
+   yy[0] = (GLint)((v0->v.y + sz) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   ooa = 1.0 / ooa;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+#endif
+}
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define TRI( a, b, c )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_tri( mmesa, a, b, c );	\
+   else						\
+      mach64_draw_triangle( mmesa, a, b, c );	\
+} while (0)
+
+#define QUAD( a, b, c, d )			\
+do {						\
+   if (DO_FALLBACK) {				\
+      mmesa->draw_tri( mmesa, a, b, d );	\
+      mmesa->draw_tri( mmesa, b, c, d );	\
+   } else 					\
+      mach64_draw_quad( mmesa, a, b, c, d );	\
+} while (0)
+
+#define LINE( v0, v1 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_line( mmesa, v0, v1 );	\
+   else 					\
+      mach64_draw_line( mmesa, v0, v1 );	\
+} while (0)
+
+#define POINT( v0 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_point( mmesa, v0 );		\
+   else 					\
+      mach64_draw_point( mmesa, v0 );		\
+} while (0)
+
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define MACH64_OFFSET_BIT	0x01
+#define MACH64_TWOSIDE_BIT	0x02
+#define MACH64_UNFILLED_BIT	0x04
+#define MACH64_FALLBACK_BIT	0x08
+#define MACH64_MAX_TRIFUNC	0x10
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[MACH64_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK (IND & MACH64_FALLBACK_BIT)
+#define DO_OFFSET   (IND & MACH64_OFFSET_BIT)
+#define DO_UNFILLED (IND & MACH64_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & MACH64_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_RGBA   1
+#define HAVE_SPEC   1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define VERTEX mach64Vertex
+#define TAB rast_tab
+
+#if MACH64_NATIVE_VTXFMT
+
+/* #define DEPTH_SCALE 65536.0 */
+#define DEPTH_SCALE 1
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) & 0xffff) / 4.0)
+#define VERT_Y(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) >> 16) / 4.0)
+#define VERT_Z(_v) ((GLfloat) LE32_IN( &(_v)->ui[zoffset] ))
+#define INSANE_VERTICES
+#define VERT_SET_Z(_v,val) LE32_OUT( &(_v)->ui[zoffset], (GLuint)(val) )
+#define VERT_Z_ADD(_v,val) LE32_OUT( &(_v)->ui[zoffset], LE32_IN( &(_v)->ui[zoffset] ) + (GLuint)(val) )
+#define AREA_IS_CCW( a ) ((a) < 0)
+#define GET_VERTEX(e) (mmesa->verts + ((e) * mmesa->vertex_size * sizeof(int)))
+
+#define MACH64_COLOR( dst, src )                \
+do {						\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]);				\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]);				\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[3], src[3]);				\
+} while (0)
+
+#define MACH64_SPEC( dst, src )			\
+do {						\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]);	\
+} while (0)
+
+#define VERT_SET_RGBA( v, c )    MACH64_COLOR( v->ub4[coloroffset], c )
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+
+#define VERT_SET_SPEC( v, c )    if (havespec) MACH64_SPEC( v->ub4[specoffset], c )
+#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V( v0->ub4[specoffset], v1->ub4[specoffset] )
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[specoffset] = spec[idx]
+
+#define LOCAL_VARS(n)						\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);		\
+   GLuint color[n], spec[n];					\
+   GLuint vertex_size = mmesa->vertex_size;			\
+   const GLuint xyoffset = 9;					\
+   const GLuint coloroffset = 8;				\
+   const GLuint zoffset = 7;					\
+   const GLuint specoffset = 6;					\
+   GLboolean havespec = vertex_size >= 4 ? 1 : 0;		\
+   (void) color; (void) spec; (void) vertex_size; 		\
+   (void) xyoffset; (void) coloroffset; (void) zoffset;		\
+   (void) specoffset; (void) havespec;
+
+#else
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a > 0)
+#define GET_VERTEX(e) (mmesa->verts + ((e) * mmesa->vertex_size * sizeof(int)))
+
+#define MACH64_COLOR( dst, src )                \
+do {						\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]);				\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]);				\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[3], src[3]);				\
+} while (0)
+
+#define MACH64_SPEC( dst, src )			\
+do {						\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]);	\
+} while (0)
+
+#define VERT_SET_RGBA( v, c )    MACH64_COLOR( v->ub4[coloroffset], c )
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+
+#define VERT_SET_SPEC( v, c )    if (havespec) MACH64_SPEC( v->ub4[5], c )
+#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V(v0->ub4[5], v1->ub4[5])
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = v[idx]->ui[5]
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = spec[idx]
+
+#define LOCAL_VARS(n)						\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);		\
+   GLuint color[n], spec[n];					\
+   GLuint coloroffset = (mmesa->vertex_size == 4 ? 3 : 4);	\
+   GLboolean havespec = (mmesa->vertex_size == 4 ? 0 : 1);	\
+   (void) color; (void) spec; (void) coloroffset; (void) havespec;
+
+#endif
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) if (mmesa->hw_primitive != hw_prim[x]) \
+                        mach64RasterPrimitive( ctx, hw_prim[x] )
+#define RENDER_PRIMITIVE mmesa->render_primitive
+#define IND MACH64_FALLBACK_BIT
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT| \
+	     MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+}
+
+
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+
+
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.
+ */
+static void
+mach64_fallback_tri( mach64ContextPtr mmesa,
+		     mach64Vertex *v0,
+		     mach64Vertex *v1,
+		     mach64Vertex *v2 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[3];
+   mach64_translate_vertex( ctx, v0, &v[0] );
+   mach64_translate_vertex( ctx, v1, &v[1] );
+   mach64_translate_vertex( ctx, v2, &v[2] );
+   _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
+}
+
+
+static void
+mach64_fallback_line( mach64ContextPtr mmesa,
+		    mach64Vertex *v0,
+		    mach64Vertex *v1 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[2];
+   mach64_translate_vertex( ctx, v0, &v[0] );
+   mach64_translate_vertex( ctx, v1, &v[1] );
+   _swrast_Line( ctx, &v[0], &v[1] );
+}
+
+
+static void
+mach64_fallback_point( mach64ContextPtr mmesa,
+		     mach64Vertex *v0 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[1];
+   mach64_translate_vertex( ctx, v0, &v[0] );
+   _swrast_Point( ctx, &v[0] );
+}
+
+
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define VERT(x) (mach64Vertex *)(mach64verts + ((x) * vertsize * sizeof(int)))
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      mach64_draw_point( mmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   mach64_draw_line( mmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   mach64_draw_triangle( mmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   mach64_draw_quad( mmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do {					\
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);	\
+   mach64RenderPrimitive( ctx, x );			\
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+    mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);		\
+    const GLuint vertsize = mmesa->vertex_size;                 \
+    const char *mach64verts = (char *)mmesa->verts;		\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    (void) elt;
+#define RESET_STIPPLE
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) mach64_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) mach64_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+/**********************************************************************/
+/*                    Render clipped primitives                       */
+/**********************************************************************/
+
+static void mach64RenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+				     GLuint n )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint prim = mmesa->render_primitive;
+
+   /* Render the new vertices as an unclipped polygon.
+    */
+   {
+      GLuint *tmp = VB->Elts;
+      VB->Elts = (GLuint *)elts;
+      tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, PRIM_BEGIN|PRIM_END );
+      VB->Elts = tmp;
+   }
+
+   /* Restore the render primitive
+    */
+   if (prim != GL_POLYGON)
+      tnl->Driver.Render.PrimitiveNotify( ctx, prim );
+
+}
+
+static void mach64RenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.Line( ctx, ii, jj );
+}
+
+#if MACH64_NATIVE_VTXFMT
+static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+					 GLuint n )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   const GLuint vertsize = mmesa->vertex_size;
+   GLint a;
+   union {
+      GLfloat f;
+      CARD32 u;
+   } ooa;
+   GLuint xy;
+   const GLuint xyoffset = 9;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * n + (n-2);
+   CARD32 *vb, *vbchk;
+   GLubyte *mach64verts = (GLubyte *)mmesa->verts;
+   mach64VertexPtr v0, v1, v2;
+   int i;
+   
+   v0 = (mach64VertexPtr)VERT(elts[1]);
+   v1 = (mach64VertexPtr)VERT(elts[2]);
+   v2 = (mach64VertexPtr)VERT(elts[0]);
+      
+   xy = LE32_IN( &v0->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v1->ui[xyoffset] );
+   xx[1] = (GLshort)( xy >> 16 );
+   yy[1] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v2->ui[xyoffset] );
+   xx[2] = (GLshort)( xy >> 16 );
+   yy[2] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+
+   if ( (mmesa->backface_sign &&
+	((a < 0 && !signbit( mmesa->backface_sign )) || 
+	(a > 0 && signbit( mmesa->backface_sign )))) ) {
+      /* cull polygon */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Polygon culled\n");
+      return;
+   }
+   
+   ooa.f = 16.0 / a;
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   COPY_VERTEX_OOA( vb, vertsize, v2, 3 );
+   LE32_OUT( vb++, ooa.u );
+
+   i = 3;
+   while (1) {
+      if (i >= n)
+	 break;
+      v0 = (mach64VertexPtr)VERT(elts[i]);
+      i++;
+
+      xy = LE32_IN( &v0->ui[xyoffset] );
+      xx[0] = (GLshort)( xy >> 16 );
+      yy[0] = (GLshort)( xy & 0xffff );
+	      
+      a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+	  (yy[0] - yy[2]) * (xx[1] - xx[2]);
+      ooa.f = 16.0 / a;
+   
+      COPY_VERTEX_OOA( vb, vertsize, v0, 1 );
+      LE32_OUT( vb++, ooa.u );
+      
+      if (i >= n)
+	 break;
+      v1 = (mach64VertexPtr)VERT(elts[i]);
+      i++;
+
+      xy = LE32_IN( &v1->ui[xyoffset] );
+      xx[1] = (GLshort)( xy >> 16 );
+      yy[1] = (GLshort)( xy & 0xffff );
+	      
+      a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+	  (yy[0] - yy[2]) * (xx[1] - xx[2]);
+      ooa.f = 16.0 / a;
+   
+      COPY_VERTEX_OOA( vb, vertsize, v1, 2 );
+      LE32_OUT( vb++, ooa.u );
+   }
+
+   assert( vb == vbchk );
+}
+#else
+static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+					 GLuint n )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   const GLuint vertsize = mmesa->vertex_size;
+   GLubyte *mach64verts = (GLubyte *)mmesa->verts;
+   const GLuint *start = (const GLuint *)VERT(elts[0]);
+   int i;
+
+   for (i = 2 ; i < n ; i++) {
+      mach64_draw_triangle( mmesa, 
+			    VERT(elts[i-1]), 
+			    VERT(elts[i]), 
+			    (mach64VertexPtr) start
+			    );
+   }
+}
+#endif /* MACH64_NATIVE_VTXFMT */
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+#define _MACH64_NEW_RENDER_STATE (_DD_NEW_POINT_SMOOTH |	\
+			          _DD_NEW_LINE_SMOOTH |		\
+			          _DD_NEW_LINE_STIPPLE |	\
+			          _DD_NEW_TRI_SMOOTH |		\
+			          _DD_NEW_TRI_STIPPLE |		\
+			          _NEW_POLYGONSTIPPLE |		\
+			          _DD_NEW_TRI_UNFILLED |	\
+			          _DD_NEW_TRI_LIGHT_TWOSIDE |	\
+			          _DD_NEW_TRI_OFFSET)		\
+
+#define POINT_FALLBACK (DD_POINT_SMOOTH)
+#define LINE_FALLBACK (DD_LINE_SMOOTH|DD_LINE_STIPPLE)
+#define TRI_FALLBACK (DD_TRI_SMOOTH|DD_TRI_STIPPLE)
+#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
+
+
+static void mach64ChooseRenderState(GLcontext *ctx)
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint flags = ctx->_TriangleCaps;
+   GLuint index = 0;
+
+   if (flags & (ANY_RASTER_FLAGS|ANY_FALLBACK_FLAGS)) {
+      mmesa->draw_point = mach64_draw_point;
+      mmesa->draw_line = mach64_draw_line;
+      mmesa->draw_tri = mach64_draw_triangle;
+
+      if (flags & ANY_RASTER_FLAGS) {
+	 if (flags & DD_TRI_LIGHT_TWOSIDE) index |= MACH64_TWOSIDE_BIT;
+	 if (flags & DD_TRI_OFFSET)        index |= MACH64_OFFSET_BIT;
+	 if (flags & DD_TRI_UNFILLED)      index |= MACH64_UNFILLED_BIT;
+      }
+
+      /* Hook in fallbacks for specific primitives.
+       */
+      if (flags & (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)) {
+	 if (flags & POINT_FALLBACK) mmesa->draw_point = mach64_fallback_point;
+	 if (flags & LINE_FALLBACK)  mmesa->draw_line = mach64_fallback_line;
+	 if (flags & TRI_FALLBACK)   mmesa->draw_tri = mach64_fallback_tri;
+	 index |= MACH64_FALLBACK_BIT;
+      }
+   }
+
+   if (index != mmesa->RenderIndex) {
+      TNLcontext *tnl = TNL_CONTEXT(ctx);
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = mach64_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = mach64_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+	 tnl->Driver.Render.ClippedPolygon = mach64FastRenderClippedPoly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = mach64RenderClippedLine;
+	 tnl->Driver.Render.ClippedPolygon = mach64RenderClippedPoly;
+      }
+
+      mmesa->RenderIndex = index;
+   }
+}
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+static void mach64RunPipeline( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if (mmesa->new_state)
+      mach64DDUpdateHWState( ctx );
+
+   if (!mmesa->Fallback && mmesa->NewGLState) {
+      if (mmesa->NewGLState & _MACH64_NEW_VERTEX_STATE)
+	 mach64ChooseVertexState( ctx );
+
+      if (mmesa->NewGLState & _MACH64_NEW_RENDER_STATE)
+	 mach64ChooseRenderState( ctx );
+
+      mmesa->NewGLState = 0;
+   }
+
+   _tnl_run_pipeline( ctx );
+}
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+/* This is called when Mesa switches between rendering triangle
+ * primitives (such as GL_POLYGON, GL_QUADS, GL_TRIANGLE_STRIP, etc),
+ * and lines, points and bitmaps.
+ */
+
+static void mach64RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   mmesa->new_state |= MACH64_NEW_CONTEXT;
+   mmesa->dirty |= MACH64_UPLOAD_CONTEXT;
+
+   if (mmesa->hw_primitive != hwprim) {
+      FLUSH_BATCH( mmesa );
+      mmesa->hw_primitive = hwprim;
+   }
+}
+
+static void mach64RenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint hw = hw_prim[prim];
+
+   mmesa->render_primitive = prim;
+
+   if (prim >= GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      return;
+
+   mach64RasterPrimitive( ctx, hw );
+}
+
+
+static void mach64RenderStart( GLcontext *ctx )
+{
+   /* Check for projective texturing.  Make sure all texcoord
+    * pointers point to something.  (fix in mesa?)
+    */
+   mach64CheckTexSizes( ctx );
+}
+
+static void mach64RenderFinish( GLcontext *ctx )
+{
+   if (MACH64_CONTEXT(ctx)->RenderIndex & MACH64_FALLBACK_BIT)
+      _swrast_flush( ctx );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glReadBuffer",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "glLogicOp (mode != GL_COPY)",
+   "GL_SEPARATE_SPECULAR_COLOR",
+   "glBlendEquation (mode != ADD)",
+   "glBlendFunc",
+   "Rasterization disable",
+};
+
+
+static const char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+void mach64Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint oldfallback = mmesa->Fallback;
+
+   if (mode) {
+      mmesa->Fallback |= bit;
+      if (oldfallback == 0) {
+	 FLUSH_BATCH( mmesa );
+	 _swsetup_Wakeup( ctx );
+	 mmesa->RenderIndex = ~0;
+	 if (MACH64_DEBUG & DEBUG_VERBOSE_FALLBACK) {
+	    fprintf(stderr, "Mach64 begin rasterization fallback: 0x%x %s\n",
+		    bit, getFallbackString(bit));
+	 }
+      }
+   }
+   else {
+      mmesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = mach64RenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = mach64RenderPrimitive;
+	 tnl->Driver.Render.Finish = mach64RenderFinish;
+	 tnl->Driver.Render.BuildVertices = mach64BuildVertices;
+	 mmesa->NewGLState |= (_MACH64_NEW_RENDER_STATE|
+			       _MACH64_NEW_VERTEX_STATE);
+	 if (MACH64_DEBUG & DEBUG_VERBOSE_FALLBACK) {
+	    fprintf(stderr, "Mach64 end rasterization fallback: 0x%x %s\n",
+		    bit, getFallbackString(bit));
+	 }
+      }
+   }
+}
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void mach64InitTriFuncs( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.RunPipeline = mach64RunPipeline;
+   tnl->Driver.Render.Start = mach64RenderStart;
+   tnl->Driver.Render.Finish = mach64RenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = mach64RenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple;
+   tnl->Driver.Render.BuildVertices = mach64BuildVertices;
+}
diff --git a/src/mach64_tris.h b/src/mach64_tris.h
new file mode 100644
index 0000000..2087032
--- /dev/null
+++ b/src/mach64_tris.h
@@ -0,0 +1,43 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_TRIS_H__
+#define __MACH64_TRIS_H__
+
+#include "mtypes.h"
+
+extern void mach64InitTriFuncs( GLcontext *ctx );
+
+
+extern void mach64Fallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( mmesa, bit, mode ) mach64Fallback( mmesa->glCtx, bit, mode )
+
+
+#endif /* __MACH64_TRIS_H__ */
diff --git a/src/mach64_vb.c b/src/mach64_vb.c
new file mode 100644
index 0000000..83a5f73
--- /dev/null
+++ b/src/mach64_vb.c
@@ -0,0 +1,642 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "imports.h"
+#include "macros.h"
+#include "colormac.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/t_context.h"
+
+#include "mach64_context.h"
+#include "mach64_vb.h"
+#include "mach64_ioctl.h"
+#include "mach64_tris.h"
+#include "mach64_state.h"
+
+
+#define MACH64_TEX1_BIT       0x1
+#define MACH64_TEX0_BIT       0x2
+#define MACH64_RGBA_BIT       0x4
+#define MACH64_SPEC_BIT       0x8
+#define MACH64_FOG_BIT        0x10
+#define MACH64_XYZW_BIT       0x20
+#define MACH64_PTEX_BIT       0x40
+#define MACH64_MAX_SETUP      0x80
+
+static struct {
+   void                (*emit)( GLcontext *, GLuint, GLuint, void *, GLuint );
+   tnl_interp_func		interp;
+   tnl_copy_pv_func	        copy_pv;
+   GLboolean           (*check_tex_sizes)( GLcontext *ctx );
+   GLuint               vertex_size;
+   GLuint               vertex_format;
+} setup_tab[MACH64_MAX_SETUP];
+
+#define TINY_VERTEX_FORMAT      1
+#define NOTEX_VERTEX_FORMAT     2
+#define TEX0_VERTEX_FORMAT      3
+#define TEX1_VERTEX_FORMAT      4
+#define PROJ_TEX1_VERTEX_FORMAT 0
+#define TEX2_VERTEX_FORMAT      0
+#define TEX3_VERTEX_FORMAT      0
+#define PROJ_TEX3_VERTEX_FORMAT 0
+
+#define DO_XYZW (IND & MACH64_XYZW_BIT)
+#define DO_RGBA (IND & MACH64_RGBA_BIT)
+#define DO_SPEC (IND & MACH64_SPEC_BIT)
+#define DO_FOG  (IND & MACH64_FOG_BIT)
+#define DO_TEX0 (IND & MACH64_TEX0_BIT)
+#define DO_TEX1 (IND & MACH64_TEX1_BIT)
+#define DO_TEX2 0
+#define DO_TEX3 0
+#define DO_PTEX (IND & MACH64_PTEX_BIT)
+
+#define VERTEX mach64Vertex
+#define LOCALVARS mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+#define GET_VIEWPORT_MAT() mmesa->hw_viewport
+#define GET_TEXSOURCE(n)  mmesa->tmu_source[n]
+#define GET_VERTEX_FORMAT() mmesa->vertex_format
+#define GET_VERTEX_STORE() mmesa->verts
+#define GET_VERTEX_SIZE() mmesa->vertex_size * sizeof(GLuint)
+
+#define HAVE_HW_VIEWPORT    0
+#define HAVE_HW_DIVIDE      0
+#define HAVE_RGBA_COLOR     0
+#define HAVE_TINY_VERTICES  1
+#define HAVE_NOTEX_VERTICES 1
+#define HAVE_TEX0_VERTICES  1
+#define HAVE_TEX1_VERTICES  1
+#define HAVE_TEX2_VERTICES  0
+#define HAVE_TEX3_VERTICES  0
+#define HAVE_PTEX_VERTICES  0
+
+#define UNVIEWPORT_VARS						\
+   const GLfloat dx = - (GLfloat)mmesa->drawX - SUBPIXEL_X;	\
+   const GLfloat dy = (mmesa->driDrawable->h +			\
+		       (GLfloat)mmesa->drawY  + SUBPIXEL_Y);	\
+   const GLfloat sz = 1.0 / mmesa->depth_scale
+
+#if MACH64_NATIVE_VTXFMT
+   
+#define UNVIEWPORT_X(x)    ((GLfloat)(x) / 4.0)  +  dx
+#define UNVIEWPORT_Y(y)  - ((GLfloat)(y) / 4.0)  +  dy
+#define UNVIEWPORT_Z(z)    (GLfloat)((z) >> 15)  *  sz
+
+#else
+
+#define UNVIEWPORT_X(x)    x  +  dx;
+#define UNVIEWPORT_Y(y)  - y  +  dy;
+#define UNVIEWPORT_Z(z)    z  *  sz;
+
+#endif
+
+#define PTEX_FALLBACK() FALLBACK(MACH64_CONTEXT(ctx), MACH64_FALLBACK_TEXTURE, 1)
+
+#define IMPORT_FLOAT_COLORS mach64_import_float_colors
+#define IMPORT_FLOAT_SPEC_COLORS mach64_import_float_spec_colors
+
+#define INTERP_VERTEX setup_tab[mmesa->SetupIndex].interp
+#define COPY_PV_VERTEX setup_tab[mmesa->SetupIndex].copy_pv
+
+/***********************************************************************
+ *         Generate  pv-copying and translation functions              *
+ ***********************************************************************/
+
+#if MACH64_NATIVE_VTXFMT
+
+#define TAG(x) mach64_##x
+#include "mach64_native_vb.c"
+
+#else
+
+#define TAG(x) mach64_##x
+#include "tnl_dd/t_dd_vb.c"
+
+#endif
+
+/***********************************************************************
+ *             Generate vertex emit and interp functions               *
+ ***********************************************************************/
+
+
+#if MACH64_NATIVE_VTXFMT
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgst0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgspt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_wgf
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgft0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgft0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfpt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT)
+#define TAG(x) x##_t0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_t0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT)
+#define TAG(x) x##_f
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_ft0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_ft0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT)
+#define TAG(x) x##_g
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gt0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gst0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_gf
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gfs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gft0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gft0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gfst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_gfst0t1
+#include "mach64_native_vbtmp.h"
+
+#else
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgst0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgspt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_wgf
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgft0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgft0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfpt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT)
+#define TAG(x) x##_t0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_t0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT)
+#define TAG(x) x##_f
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_ft0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_ft0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT)
+#define TAG(x) x##_g
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gt0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gst0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_gf
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gfs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gft0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gft0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gfst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_gfst0t1
+#include "mach64_vbtmp.h"
+
+#endif
+
+static void init_setup_tab( void )
+{
+   init_wg();
+   init_wgs();
+   init_wgt0();
+   init_wgt0t1();
+   init_wgpt0();
+   init_wgst0();
+   init_wgst0t1();
+   init_wgspt0();
+   init_wgf();
+   init_wgfs();
+   init_wgft0();
+   init_wgft0t1();
+   init_wgfpt0();
+   init_wgfst0();
+   init_wgfst0t1();
+   init_wgfspt0();
+   init_t0();
+   init_t0t1();
+   init_f();
+   init_ft0();
+   init_ft0t1();
+   init_g();
+   init_gs();
+   init_gt0();
+   init_gt0t1();
+   init_gst0();
+   init_gst0t1();
+   init_gf();
+   init_gfs();
+   init_gft0();
+   init_gft0t1();
+   init_gfst0();
+   init_gfst0t1();
+}
+
+
+
+void mach64PrintSetupFlags( char *msg, GLuint flags )
+{
+   fprintf( stderr, "%s: %d %s%s%s%s%s%s%s\n",
+	    msg,
+	    (int)flags,
+	    (flags & MACH64_XYZW_BIT)	? " xyzw," : "",
+	    (flags & MACH64_RGBA_BIT)	? " rgba," : "",
+	    (flags & MACH64_SPEC_BIT)	? " spec," : "",
+	    (flags & MACH64_FOG_BIT)	? " fog," : "",
+	    (flags & MACH64_TEX0_BIT)	? " tex-0," : "",
+	    (flags & MACH64_TEX1_BIT)	? " tex-1," : "",
+	    (flags & MACH64_PTEX_BIT)	? " ptex," : "");
+}
+
+
+
+
+void mach64CheckTexSizes( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+
+   if (!setup_tab[mmesa->SetupIndex].check_tex_sizes(ctx)) {
+      TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+      /* Invalidate stored verts
+       */
+      mmesa->SetupNewInputs = ~0;
+      mmesa->SetupIndex |= MACH64_PTEX_BIT;
+
+      if (!mmesa->Fallback &&
+	  !(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	 tnl->Driver.Render.Interp = setup_tab[mmesa->SetupIndex].interp;
+	 tnl->Driver.Render.CopyPV = setup_tab[mmesa->SetupIndex].copy_pv;
+      }
+   }
+}
+
+void mach64BuildVertices( GLcontext *ctx,
+			GLuint start,
+			GLuint count,
+			GLuint newinputs )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   GLuint stride = mmesa->vertex_size * sizeof(int);
+   GLubyte *v = ((GLubyte *)mmesa->verts + (start * stride));
+
+   newinputs |= mmesa->SetupNewInputs;
+   mmesa->SetupNewInputs = 0;
+
+   if (!newinputs)
+      return;
+
+   if (newinputs & VERT_BIT_POS) {
+      setup_tab[mmesa->SetupIndex].emit( ctx, start, count, v, stride );
+   } else {
+      GLuint ind = 0;
+
+      if (newinputs & VERT_BIT_COLOR0)
+	 ind |= MACH64_RGBA_BIT;
+
+      if (newinputs & VERT_BIT_COLOR1)
+	 ind |= MACH64_SPEC_BIT;
+
+      if (newinputs & VERT_BIT_TEX0)
+	 ind |= MACH64_TEX0_BIT;
+
+      if (newinputs & VERT_BIT_TEX1)
+	 ind |= MACH64_TEX1_BIT;
+
+      if (newinputs & VERT_BIT_FOG)
+	 ind |= MACH64_FOG_BIT;
+
+      if (mmesa->SetupIndex & MACH64_PTEX_BIT)
+	 ind = ~0;
+
+      ind &= mmesa->SetupIndex;
+
+      if (ind) {
+	 setup_tab[ind].emit( ctx, start, count, v, stride );
+      }
+   }
+}
+
+void mach64ChooseVertexState( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   GLuint ind = MACH64_XYZW_BIT|MACH64_RGBA_BIT;
+   
+   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+      ind |= MACH64_SPEC_BIT;
+
+   if (ctx->Fog.Enabled)
+      ind |= MACH64_FOG_BIT;
+
+   if (ctx->Texture._EnabledUnits) {
+      ind |= MACH64_TEX0_BIT;
+      if (ctx->Texture.Unit[0]._ReallyEnabled &&
+	  ctx->Texture.Unit[1]._ReallyEnabled) {
+	 ind |= MACH64_TEX1_BIT;
+      }
+   }
+
+   mmesa->SetupIndex = ind;
+
+   if (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED)) {
+      tnl->Driver.Render.Interp = mach64_interp_extras;
+      tnl->Driver.Render.CopyPV = mach64_copy_pv_extras;
+   } else {
+      tnl->Driver.Render.Interp = setup_tab[ind].interp;
+      tnl->Driver.Render.CopyPV = setup_tab[ind].copy_pv;
+   }
+
+#if 0
+   if (MACH64_DEBUG & DEBUG_VERBOSE_MSG) {
+      mach64PrintSetupFlags( __FUNCTION__, ind );
+  }
+#endif
+
+   if (setup_tab[ind].vertex_format != mmesa->vertex_format) {
+      FLUSH_BATCH(mmesa);
+      mmesa->vertex_format = setup_tab[ind].vertex_format;
+      mmesa->vertex_size = setup_tab[ind].vertex_size;
+   }
+}
+
+
+#if 0
+void mach64_emit_contiguous_verts( GLcontext *ctx,
+				 GLuint start,
+				 GLuint count )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint vertex_size = mmesa->vertex_size * 4;
+   GLuint *dest = mach64AllocDmaLow( mmesa, (count-start) * vertex_size);
+   setup_tab[mmesa->SetupIndex].emit( ctx, start, count, dest, vertex_size );
+}
+#endif
+
+
+void mach64InitVB( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint size = TNL_CONTEXT(ctx)->vb.Size;
+
+   mmesa->verts = (GLubyte *)ALIGN_MALLOC(size * 4 * 16, 32);
+
+   {
+      static int firsttime = 1;
+      if (firsttime) {
+	 init_setup_tab();
+	 firsttime = 0;
+      }
+   }
+}
+
+
+void mach64FreeVB( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   if (mmesa->verts) {
+      ALIGN_FREE(mmesa->verts);
+      mmesa->verts = 0;
+   }
+}
diff --git a/src/mach64_vb.h b/src/mach64_vb.h
new file mode 100644
index 0000000..bcc4759
--- /dev/null
+++ b/src/mach64_vb.h
@@ -0,0 +1,77 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_VB_H__
+#define __MACH64_VB_H__
+
+#include "mtypes.h"
+#include "swrast/swrast.h"
+#include "mach64_context.h"
+
+/* premultiply texture coordinates by homogenous coordinate */
+#define MACH64_PREMULT_TEXCOORDS
+
+#define _MACH64_NEW_VERTEX_STATE (_DD_NEW_SEPARATE_SPECULAR |          \
+                               _DD_NEW_TRI_LIGHT_TWOSIDE |             \
+                               _DD_NEW_TRI_UNFILLED |                  \
+                               _NEW_TEXTURE |                          \
+                               _NEW_FOG)
+
+
+extern void mach64CheckTexSizes( GLcontext *ctx );
+extern void mach64ChooseVertexState( GLcontext *ctx );
+
+extern void mach64BuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+				   GLuint newinputs );
+
+extern void mach64PrintSetupFlags(char *msg, GLuint flags );
+
+extern void mach64InitVB( GLcontext *ctx );
+extern void mach64FreeVB( GLcontext *ctx );
+
+#if 0
+extern void mach64_emit_contiguous_verts( GLcontext *ctx,
+					    GLuint start,
+					    GLuint count );
+
+extern void mach64_emit_indexed_verts( GLcontext *ctx,
+					 GLuint start,
+					 GLuint count );
+#endif
+
+extern void mach64_translate_vertex( GLcontext *ctx,
+				       const mach64Vertex *src,
+				       SWvertex *dst );
+
+extern void mach64_print_vertex( GLcontext *ctx, const mach64Vertex *v );
+
+
+#endif /* __MACH64_VB_H__ */
diff --git a/src/mach64_vbtmp.h b/src/mach64_vbtmp.h
new file mode 100644
index 0000000..c1207ca
--- /dev/null
+++ b/src/mach64_vbtmp.h
@@ -0,0 +1,770 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ *
+ * Modified for mach64 by:
+ *    Leif Delgass <ldelgass@retinalburn.net>
+ *    Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+
+/* Unlike the other templates here, this assumes quite a bit about the
+ * underlying hardware.  Specifically it assumes a d3d-like vertex
+ * format, with a layout more or less constrained to look like the
+ * following:
+ *
+ * union {
+ *    struct {
+ *        float x, y, z, w;
+ *        struct { char r, g, b, a; } color;
+ *        struct { char r, g, b, fog; } spec;
+ *        float u0, v0;
+ *        float u1, v1;
+ *        float u2, v2;
+ *        float u3, v3;
+ *    } v;
+ *    struct {
+ *        float x, y, z, w;
+ *        struct { char r, g, b, a; } color;
+ *        struct { char r, g, b, fog; } spec;
+ *        float u0, v0, q0;
+ *        float u1, v1, q1;
+ *        float u2, v2, q2;
+ *        float u3, v3, q3;
+ *    } pv;
+ *    struct {
+ *        float x, y, z;
+ *        struct { char r, g, b, a; } color;
+ *    } tv;
+ *    float f[16];
+ *    unsigned int ui[16];
+ *    unsigned char ub4[4][16];
+ * }
+ *
+
+ * DO_XYZW:  Emit xyz and maybe w coordinates.
+ * DO_RGBA:  Emit color.
+ * DO_SPEC:  Emit specular color.
+ * DO_FOG:   Emit fog coordinate in specular alpha.
+ * DO_TEX0:  Emit tex0 u,v coordinates.
+ * DO_TEX1:  Emit tex1 u,v coordinates.
+ * DO_TEX2:  Emit tex2 u,v coordinates.
+ * DO_TEX3:  Emit tex3 u,v coordinates.
+ * DO_PTEX:  Emit tex0,1,2,3 q coordinates where possible.
+ *
+ * HAVE_RGBA_COLOR: Hardware takes color in rgba order (else bgra).
+ *
+ * HAVE_HW_VIEWPORT:  Hardware performs viewport transform.
+ * HAVE_HW_DIVIDE:  Hardware performs perspective divide.
+ *
+ * HAVE_TINY_VERTICES:  Hardware understands v.tv format.
+ * HAVE_PTEX_VERTICES:  Hardware understands v.pv format.
+ * HAVE_NOTEX_VERTICES:  Hardware understands v.v format with texcount 0.
+ *
+ * Additionally, this template assumes it is emitting *transformed*
+ * vertices; the modifications to emit untransformed vertices (ie. to
+ * t&l hardware) are probably too great to cooexist with the code
+ * already in this file.
+ *
+ * NOTE: The PTEX vertex format always includes TEX0 and TEX1, even if
+ * only TEX0 is enabled, in order to maintain a vertex size which is
+ * an exact number of quadwords.
+ */
+
+#if (HAVE_HW_VIEWPORT)
+#define VIEWPORT_X(dst,x) dst = x
+#define VIEWPORT_Y(dst,y) dst = y
+#define VIEWPORT_Z(dst,z) dst = z
+#else
+#define VIEWPORT_X(dst,x) dst = s[0]  * x + s[12]
+#define VIEWPORT_Y(dst,y) dst = s[5]  * y + s[13]
+#define VIEWPORT_Z(dst,z) dst = s[10] * z + s[14]
+#endif
+
+#if (HAVE_HW_DIVIDE && !HAVE_PTEX_VERTICES)
+#error "can't cope with this combination" 
+#endif 
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+#ifndef CHECK_HW_DIVIDE
+#define CHECK_HW_DIVIDE 1
+#endif
+
+#if (HAVE_HW_DIVIDE || DO_SPEC || DO_TEX0 || DO_FOG || !HAVE_TINY_VERTICES)
+
+static void TAG(emit)( GLcontext *ctx,
+		       GLuint start, GLuint end,
+		       void *dest,
+		       GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLfloat (*tc0)[4], (*tc1)[4], (*fog)[4];
+   GLfloat (*tc2)[4], (*tc3)[4];
+   GLfloat (*spec)[4];
+   GLfloat (*col)[4];
+   GLuint col_stride;
+   GLuint tc0_stride, tc1_stride, spec_stride, fog_stride;
+   GLuint tc2_stride, tc3_stride;
+   GLuint tc0_size, tc1_size;
+   GLuint tc2_size, tc3_size;
+   GLfloat (*coord)[4];
+   GLuint coord_stride;
+   VERTEX *v = (VERTEX *)dest;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+   const GLubyte *mask = VB->ClipMask;
+   int i;
+
+/*     fprintf(stderr, "%s(big) importable %d %d..%d\n",  */
+/*  	   __FUNCTION__, VB->importable_data, start, end); */
+
+   if (HAVE_HW_VIEWPORT && HAVE_HW_DIVIDE && CHECK_HW_DIVIDE) {
+      (void) s;
+      coord = VB->ClipPtr->data;
+      coord_stride = VB->ClipPtr->stride;
+   }
+   else {
+      coord = VB->NdcPtr->data;
+      coord_stride = VB->NdcPtr->stride;
+   }
+
+   if (DO_TEX3) {
+      const GLuint t3 = GET_TEXSOURCE(3);
+      tc3 = VB->TexCoordPtr[t3]->data;
+      tc3_stride = VB->TexCoordPtr[t3]->stride;
+      if (DO_PTEX)
+	 tc3_size = VB->TexCoordPtr[t3]->size;
+   }
+
+   if (DO_TEX2) {
+      const GLuint t2 = GET_TEXSOURCE(2);
+      tc2 = VB->TexCoordPtr[t2]->data;
+      tc2_stride = VB->TexCoordPtr[t2]->stride;
+      if (DO_PTEX)
+	 tc2_size = VB->TexCoordPtr[t2]->size;
+   }
+
+   if (DO_TEX1) {
+      const GLuint t1 = GET_TEXSOURCE(1);
+      tc1 = VB->TexCoordPtr[t1]->data;
+      tc1_stride = VB->TexCoordPtr[t1]->stride;
+      if (DO_PTEX)
+	 tc1_size = VB->TexCoordPtr[t1]->size;
+   }
+
+   if (DO_TEX0) {
+      const GLuint t0 = GET_TEXSOURCE(0);
+      tc0_stride = VB->TexCoordPtr[t0]->stride;
+      tc0 = VB->TexCoordPtr[t0]->data;
+      if (DO_PTEX) 
+	 tc0_size = VB->TexCoordPtr[t0]->size;
+   }
+
+   if (DO_RGBA) {
+      col = VB->ColorPtr[0]->data;
+      col_stride = VB->ColorPtr[0]->stride;
+   }
+
+   if (DO_SPEC) {
+      spec = VB->SecondaryColorPtr[0]->data;
+      spec_stride = VB->SecondaryColorPtr[0]->stride;
+   } else {
+      spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1];
+      spec_stride = 0;
+   }
+
+   if (DO_FOG) {
+      if (VB->FogCoordPtr) {
+	 fog = VB->FogCoordPtr->data;
+	 fog_stride = VB->FogCoordPtr->stride;
+      } else {
+	 static GLfloat tmp[4] = {0, 0, 0, 0};
+	 fog = &tmp;
+	 fog_stride = 0;
+      }
+   }
+
+   /* May have nonstandard strides:
+    */
+   if (start) {
+      coord =  (GLfloat (*)[4])((GLubyte *)coord + start * coord_stride);
+      if (DO_TEX0)
+	 tc0 =  (GLfloat (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+      if (DO_TEX1) 
+	 tc1 =  (GLfloat (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+      if (DO_TEX2) 
+	 tc2 =  (GLfloat (*)[4])((GLubyte *)tc2 + start * tc2_stride);
+      if (DO_TEX3) 
+	 tc3 =  (GLfloat (*)[4])((GLubyte *)tc3 + start * tc3_stride);
+      if (DO_RGBA) 
+	 STRIDE_4F(col, start * col_stride);
+      if (DO_SPEC)
+	 STRIDE_4F(spec, start * spec_stride);
+      if (DO_FOG)
+	 STRIDE_4F(fog, start * fog_stride);
+      //	 fog =  (GLfloat (*)[4])((GLubyte *)fog + start * fog_stride);
+      /*  STRIDE_F(fog, start * fog_stride); */
+   }
+   
+   for (i=start; i < end; i++, v = (VERTEX *)((GLubyte *)v + stride)) {
+      if (DO_XYZW) {
+	 if (HAVE_HW_VIEWPORT || mask[i] == 0) {
+	    /* unclipped */
+	    VIEWPORT_X(v->v.x, coord[0][0]);
+	    VIEWPORT_Y(v->v.y, coord[0][1]);
+	    VIEWPORT_Z(v->v.z, coord[0][2]);
+	    v->v.w = coord[0][3];
+	 } else {
+	    /* clipped */
+	    v->v.w = 1.0;
+	 }
+	 if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	    fprintf(stderr, "%s: vert (importable) %d: %.2f %.2f %.2f %f\n", 
+		    __FUNCTION__, i, v->v.x, v->v.y, v->v.z, v->v.w);
+	 }
+	 coord =  (GLfloat (*)[4])((GLubyte *)coord +  coord_stride);
+      }
+      if (DO_RGBA) {
+	 if (HAVE_RGBA_COLOR) {
+	    *(GLuint *)&v->v.color = *(GLuint *)&col[0];
+	    STRIDE_4F(col, col_stride);
+	 } else {
+	    v->v.color.blue  = col[0][2];
+	    v->v.color.green = col[0][1];
+	    v->v.color.red   = col[0][0];
+	    v->v.color.alpha = col[0][3];
+	    STRIDE_4F(col, col_stride);
+	 }
+      }
+      if (DO_SPEC) {
+	 v->v.specular.red = spec[0][0];
+	 v->v.specular.green = spec[0][1];
+	 v->v.specular.blue = spec[0][2];
+	 STRIDE_4F(spec, spec_stride);
+      }
+      if (DO_FOG) {
+	 v->v.specular.alpha = fog[0][0] * 255.0;
+	 /*  STRIDE_F(fog, fog_stride); */
+	 fog =  (GLfloat (*)[4])((GLubyte *)fog + fog_stride);
+      }
+      if (DO_TEX0) {
+	 v->v.u0 = tc0[0][0];
+	 v->v.v0 = tc0[0][1];
+	 if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	    fprintf(stderr, "%s: vert (importable) %d: u0: %.2f, v0: %.2f, w: %f\n", 
+		    __FUNCTION__, i, v->v.u0, v->v.v0, v->v.w);
+	 }
+#ifdef MACH64_PREMULT_TEXCOORDS
+	 v->v.u0 *= v->v.w;
+	 v->v.v0 *= v->v.w;
+#endif
+	 if (DO_PTEX) {
+	    if (HAVE_PTEX_VERTICES) {
+	       if (tc0_size == 4) 
+		  v->pv.q0 = tc0[0][3];
+	       else
+		  v->pv.q0 = 1.0;
+	    } 
+	    else if (tc0_size == 4) {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	       v->v.w *= tc0[0][3];
+#else
+	       float rhw = 1.0 / tc0[0][3];
+	       v->v.w *= tc0[0][3];
+	       v->v.u0 *= rhw;
+	       v->v.v0 *= rhw;
+#endif
+	    } 
+	 } 
+	 tc0 =  (GLfloat (*)[4])((GLubyte *)tc0 +  tc0_stride);
+      }
+      if (DO_TEX1) {
+	 if (DO_PTEX) {
+	    v->pv.u1 = tc1[0][0];
+	    v->pv.v1 = tc1[0][1];
+	    if (tc1_size == 4) 
+	       v->pv.q1 = tc1[0][3];
+	    else
+	       v->pv.q1 = 1.0;
+	 } 
+	 else {
+	    v->v.u1 = tc1[0][0];
+	    v->v.v1 = tc1[0][1];
+	 }
+#ifdef MACH64_PREMULT_TEXCOORDS
+	 v->v.u1 *= v->v.w;
+	 v->v.v1 *= v->v.w;
+#endif
+	 tc1 =  (GLfloat (*)[4])((GLubyte *)tc1 +  tc1_stride);
+      } 
+      else if (DO_PTEX) {
+	 *(GLuint *)&v->pv.q1 = 0;	/* avoid culling on radeon */
+      }
+      if (DO_TEX2) {
+	 if (DO_PTEX) {
+	    v->pv.u2 = tc2[0][0];
+	    v->pv.v2 = tc2[0][1];
+	    if (tc2_size == 4) 
+	       v->pv.q2 = tc2[0][3];
+	    else
+	       v->pv.q2 = 1.0;
+	 } 
+	 else {
+	    v->v.u2 = tc2[0][0];
+	    v->v.v2 = tc2[0][1];
+	 }
+	 tc2 =  (GLfloat (*)[4])((GLubyte *)tc2 +  tc2_stride);
+      } 
+      if (DO_TEX3) {
+	 if (DO_PTEX) {
+	    v->pv.u3 = tc3[0][0];
+	    v->pv.v3 = tc3[0][1];
+	    if (tc3_size == 4) 
+	       v->pv.q3 = tc3[0][3];
+	    else
+	       v->pv.q3 = 1.0;
+	    } 
+	 else {
+	    v->v.u3 = tc3[0][0];
+	    v->v.v3 = tc3[0][1];
+	 }
+	 tc3 =  (GLfloat (*)[4])((GLubyte *)tc3 +  tc3_stride);
+      } 
+   }
+}
+
+#else
+#if DO_XYZW
+
+#if HAVE_HW_DIVIDE
+#error "cannot use tiny vertices with hw perspective divide"
+#endif
+
+static void TAG(emit)( GLcontext *ctx, GLuint start, GLuint end,
+		       void *dest, GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLfloat (*col)[4];
+   GLuint col_stride;
+   GLfloat (*coord)[4] = VB->NdcPtr->data;
+   GLuint coord_stride = VB->NdcPtr->stride;
+   GLfloat *v = (GLfloat *)dest;
+   const GLubyte *mask = VB->ClipMask;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+   int i;
+
+   (void) s;
+
+   ASSERT(stride == 4);
+
+   col = VB->ColorPtr[0]->data;
+   col_stride = VB->ColorPtr[0]->stride;
+
+   /* Pack what's left into a 4-dword vertex.  Color is in a different
+    * place, and there is no 'w' coordinate.
+    */
+   if (start) {
+      coord =  (GLfloat (*)[4])((GLubyte *)coord + start * coord_stride);
+      STRIDE_4F(col, start * col_stride);
+   }
+   
+   for (i=start; i < end; i++, v+=4) {
+      if (HAVE_HW_VIEWPORT || mask[i] == 0) {
+	 VIEWPORT_X(v[0], coord[0][0]);
+	 VIEWPORT_Y(v[1], coord[0][1]);
+	 VIEWPORT_Z(v[2], coord[0][2]);
+      }
+      coord =  (GLfloat (*)[4])((GLubyte *)coord +  coord_stride);
+      if (DO_RGBA) {
+	 if (HAVE_RGBA_COLOR) {
+	    *(GLuint *)&v[3] = *(GLuint *)col;
+	 }
+	 else {
+	    GLubyte *b = (GLubyte *)&v[3];
+	    UNCLAMPED_FLOAT_TO_UBYTE(b[0], col[0][2]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(b[1], col[0][1]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(b[2], col[0][0]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(b[3], col[0][3]);
+	 }
+	 STRIDE_4F( col, col_stride );
+      }
+      if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	 fprintf(stderr, "vert (importable) %d: %.2f %.2f %.2f %x\n",
+		 i, v[0], v[1], v[2], *(int *)&v[3]);
+      }
+   }
+}
+#else
+static void TAG(emit)( GLcontext *ctx, GLuint start, GLuint end,
+		       void *dest, GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLfloat (*col)[4];
+   GLuint col_stride;
+   GLfloat *v = (GLfloat *)dest;
+   int i;
+
+   col = VB->ColorPtr[0]->data;
+   col_stride = VB->ColorPtr[0]->stride;
+
+   if (start)
+      STRIDE_4F(col, col_stride * start);
+
+   /* Need to figure out where color is:
+    */
+   if (GET_VERTEX_FORMAT() == TINY_VERTEX_FORMAT)
+      v += 3;
+   else
+      v += 4;
+
+   for (i=start; i < end; i++, STRIDE_F(v, stride)) {
+      if (HAVE_RGBA_COLOR) {
+	 *(GLuint *)v = *(GLuint *)col[0];
+      }
+      else {
+	 GLubyte *b = (GLubyte *)v;
+	 UNCLAMPED_FLOAT_TO_UBYTE(b[0], col[0][2]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(b[1], col[0][1]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(b[2], col[0][0]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(b[3], col[0][3]);
+      }
+      STRIDE_4F( col, col_stride );
+   }
+}
+#endif /* emit */
+#endif /* emit */
+
+#if (DO_XYZW) && (DO_RGBA)
+
+
+#if (HAVE_PTEX_VERTICES)
+static GLboolean TAG(check_tex_sizes)( GLcontext *ctx )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   /* Force 'missing' texcoords to something valid.
+    */
+   if (DO_TEX3 && VB->TexCoordPtr[2] == 0)
+      VB->TexCoordPtr[2] = VB->TexCoordPtr[3];
+
+   if (DO_TEX2 && VB->TexCoordPtr[1] == 0)
+      VB->TexCoordPtr[1] = VB->TexCoordPtr[2];
+
+   if (DO_TEX1 && VB->TexCoordPtr[0] == 0)
+      VB->TexCoordPtr[0] = VB->TexCoordPtr[1];
+
+   if (DO_PTEX)
+      return GL_TRUE;
+   
+   if ((DO_TEX3 && VB->TexCoordPtr[GET_TEXSOURCE(3)]->size == 4) ||
+       (DO_TEX2 && VB->TexCoordPtr[GET_TEXSOURCE(2)]->size == 4) ||
+       (DO_TEX1 && VB->TexCoordPtr[GET_TEXSOURCE(1)]->size == 4) ||
+       (DO_TEX0 && VB->TexCoordPtr[GET_TEXSOURCE(0)]->size == 4))
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+#else
+static GLboolean TAG(check_tex_sizes)( GLcontext *ctx )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   /* Force 'missing' texcoords to something valid.
+    */
+   if (DO_TEX3 && VB->TexCoordPtr[2] == 0)
+      VB->TexCoordPtr[2] = VB->TexCoordPtr[3];
+
+   if (DO_TEX2 && VB->TexCoordPtr[1] == 0)
+      VB->TexCoordPtr[1] = VB->TexCoordPtr[2];
+
+   if (DO_TEX1 && VB->TexCoordPtr[0] == 0)
+      VB->TexCoordPtr[0] = VB->TexCoordPtr[1];
+
+   if (DO_PTEX)
+      return GL_TRUE;
+
+   /* No hardware support for projective texture.  Can fake it for
+    * TEX0 only.
+    */
+   if ((DO_TEX3 && VB->TexCoordPtr[GET_TEXSOURCE(3)]->size == 4) ||
+       (DO_TEX2 && VB->TexCoordPtr[GET_TEXSOURCE(2)]->size == 4) ||
+       (DO_TEX1 && VB->TexCoordPtr[GET_TEXSOURCE(1)]->size == 4)) {
+      PTEX_FALLBACK();
+      return GL_FALSE;
+   }
+
+   if (DO_TEX0 && VB->TexCoordPtr[GET_TEXSOURCE(0)]->size == 4) {
+      if (DO_TEX1 || DO_TEX2 || DO_TEX3) {
+	 PTEX_FALLBACK();
+      }
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+#endif /* ptex */
+
+
+static void TAG(interp)( GLcontext *ctx,
+			 GLfloat t,
+			 GLuint edst, GLuint eout, GLuint ein,
+			 GLboolean force_boundary )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLubyte *ddverts = GET_VERTEX_STORE();
+   GLuint size = GET_VERTEX_SIZE();
+   const GLfloat *dstclip = VB->ClipPtr->data[edst];
+   GLfloat w;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+
+   VERTEX *dst = (VERTEX *)(ddverts + (edst * size));
+   VERTEX *in  = (VERTEX *)(ddverts + (ein * size));
+   VERTEX *out = (VERTEX *)(ddverts + (eout * size));
+
+   (void)s;
+
+   if (HAVE_HW_DIVIDE && CHECK_HW_DIVIDE) {
+      VIEWPORT_X( dst->v.x, dstclip[0] );
+      VIEWPORT_Y( dst->v.y, dstclip[1] );
+      VIEWPORT_Z( dst->v.z, dstclip[2] );
+      w = dstclip[3];
+   }
+   else {
+      w = (dstclip[3] == 0.0F) ? 1.0 : (1.0 / dstclip[3]);
+      VIEWPORT_X( dst->v.x, dstclip[0] * w );
+      VIEWPORT_Y( dst->v.y, dstclip[1] * w );
+      VIEWPORT_Z( dst->v.z, dstclip[2] * w );
+   }
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+      fprintf( stderr, "%s: dst vert: %.2f %.2f %.2f %f\n",
+	       __FUNCTION__,
+	       dst->v.x,
+	       dst->v.y,
+	       dst->v.z,
+	       w );
+   }
+
+   if ((HAVE_HW_DIVIDE && CHECK_HW_DIVIDE) || 
+       DO_FOG || DO_SPEC || DO_TEX0 || DO_TEX1 ||
+       DO_TEX2 || DO_TEX3 || !HAVE_TINY_VERTICES) {
+
+      dst->v.w = w;
+
+      INTERP_UB( t, dst->ub4[4][0], out->ub4[4][0], in->ub4[4][0] );
+      INTERP_UB( t, dst->ub4[4][1], out->ub4[4][1], in->ub4[4][1] );
+      INTERP_UB( t, dst->ub4[4][2], out->ub4[4][2], in->ub4[4][2] );
+      INTERP_UB( t, dst->ub4[4][3], out->ub4[4][3], in->ub4[4][3] );
+
+      if (DO_SPEC) {
+	 INTERP_UB( t, dst->ub4[5][0], out->ub4[5][0], in->ub4[5][0] );
+	 INTERP_UB( t, dst->ub4[5][1], out->ub4[5][1], in->ub4[5][1] );
+	 INTERP_UB( t, dst->ub4[5][2], out->ub4[5][2], in->ub4[5][2] );
+      }
+      if (DO_FOG) {
+	 INTERP_UB( t, dst->ub4[5][3], out->ub4[5][3], in->ub4[5][3] );
+      }
+      if (DO_TEX0) {
+	 if (DO_PTEX) {
+	    if (HAVE_PTEX_VERTICES) {
+	       INTERP_F( t, dst->pv.u0, out->pv.u0, in->pv.u0 );
+	       INTERP_F( t, dst->pv.v0, out->pv.v0, in->pv.v0 );
+	       INTERP_F( t, dst->pv.q0, out->pv.q0, in->pv.q0 );
+	    } else {
+	       GLfloat wout = VB->NdcPtr->data[eout][3];
+	       GLfloat win = VB->NdcPtr->data[ein][3];
+	       GLfloat qout = out->pv.w / wout;
+	       GLfloat qin = in->pv.w / win;
+	       GLfloat qdst, rqdst;
+
+	       ASSERT( !HAVE_HW_DIVIDE );
+
+	       INTERP_F( t, dst->v.u0, out->v.u0 * qout, in->v.u0 * qin );
+	       INTERP_F( t, dst->v.v0, out->v.v0 * qout, in->v.v0 * qin );
+	       INTERP_F( t, qdst, qout, qin );
+
+	       rqdst = 1.0 / qdst;
+	       dst->v.u0 *= rqdst;
+	       dst->v.v0 *= rqdst;
+	       dst->v.w *= rqdst;
+	    }
+	 }
+	 else {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    GLfloat qout = 1 / out->v.w;
+	    GLfloat qin = 1 / in->v.w;
+	    
+	    INTERP_F( t, dst->v.u0, out->v.u0 * qout, in->v.u0 * qin);
+	    INTERP_F( t, dst->v.v0, out->v.v0 * qout, in->v.v0 * qin);
+
+	    dst->v.u0 *= w;
+	    dst->v.v0 *= w;
+#else
+	    INTERP_F( t, dst->v.u0, out->v.u0, in->v.u0 );
+	    INTERP_F( t, dst->v.v0, out->v.v0, in->v.v0 );
+#endif
+	 }
+      }
+      if (DO_TEX1) {
+	 if (DO_PTEX) {
+	    INTERP_F( t, dst->pv.u1, out->pv.u1, in->pv.u1 );
+	    INTERP_F( t, dst->pv.v1, out->pv.v1, in->pv.v1 );
+	    INTERP_F( t, dst->pv.q1, out->pv.q1, in->pv.q1 );
+	 } else {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    GLfloat qout = 1 / out->v.w;
+	    GLfloat qin = 1 / in->v.w;
+	    
+	    INTERP_F( t, dst->v.u1, out->v.u1 * qout, in->v.u1 * qin );
+	    INTERP_F( t, dst->v.v1, out->v.v1 * qout, in->v.v1 * qin );
+
+	    dst->v.u1 *= w;
+	    dst->v.v1 *= w;
+#else
+	    INTERP_F( t, dst->v.u1, out->v.u1, in->v.u1 );
+	    INTERP_F( t, dst->v.v1, out->v.v1, in->v.v1 );
+#endif
+	 }
+      }
+      else if (DO_PTEX) {
+	 dst->pv.q0 = 0.0;	/* must be a valid float on radeon */
+      }
+      if (DO_TEX2) {
+	 if (DO_PTEX) {
+	    INTERP_F( t, dst->pv.u2, out->pv.u2, in->pv.u2 );
+	    INTERP_F( t, dst->pv.v2, out->pv.v2, in->pv.v2 );
+	    INTERP_F( t, dst->pv.q2, out->pv.q2, in->pv.q2 );
+	 } else {
+	    INTERP_F( t, dst->v.u2, out->v.u2, in->v.u2 );
+	    INTERP_F( t, dst->v.v2, out->v.v2, in->v.v2 );
+	 }
+      }
+      if (DO_TEX3) {
+	 if (DO_PTEX) {
+	    INTERP_F( t, dst->pv.u3, out->pv.u3, in->pv.u3 );
+	    INTERP_F( t, dst->pv.v3, out->pv.v3, in->pv.v3 );
+	    INTERP_F( t, dst->pv.q3, out->pv.q3, in->pv.q3 );
+	 } else {
+	    INTERP_F( t, dst->v.u3, out->v.u3, in->v.u3 );
+	    INTERP_F( t, dst->v.v3, out->v.v3, in->v.v3 );
+	 }
+      }
+   } else {
+      /* 4-dword vertex.  Color is in v[3] and there is no oow coordinate.
+       */
+      INTERP_UB( t, dst->ub4[3][0], out->ub4[3][0], in->ub4[3][0] );
+      INTERP_UB( t, dst->ub4[3][1], out->ub4[3][1], in->ub4[3][1] );
+      INTERP_UB( t, dst->ub4[3][2], out->ub4[3][2], in->ub4[3][2] );
+      INTERP_UB( t, dst->ub4[3][3], out->ub4[3][3], in->ub4[3][3] );
+   }
+}
+
+#endif /* rgba && xyzw */
+
+
+static void TAG(init)( void )
+{
+   setup_tab[IND].emit = TAG(emit);
+
+#if (DO_XYZW && DO_RGBA)
+   setup_tab[IND].check_tex_sizes = TAG(check_tex_sizes);
+   setup_tab[IND].interp = TAG(interp);
+#endif
+
+   if (DO_SPEC)
+      setup_tab[IND].copy_pv = copy_pv_rgba4_spec5;
+   else if (HAVE_HW_DIVIDE || DO_SPEC || DO_FOG || DO_TEX0 || DO_TEX1 ||
+	    DO_TEX2 || DO_TEX3 || !HAVE_TINY_VERTICES)
+      setup_tab[IND].copy_pv = copy_pv_rgba4;
+   else
+      setup_tab[IND].copy_pv = copy_pv_rgba3;
+
+   if (DO_TEX3) {
+      if (DO_PTEX) {
+	 ASSERT(HAVE_PTEX_VERTICES);
+	 setup_tab[IND].vertex_format = PROJ_TEX3_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 18;
+      }
+      else {
+	 setup_tab[IND].vertex_format = TEX3_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 14;
+      }
+   }
+   else if (DO_TEX2) {
+      if (DO_PTEX) {
+	 ASSERT(HAVE_PTEX_VERTICES);
+	 setup_tab[IND].vertex_format = PROJ_TEX3_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 18;
+      }
+      else {
+	 setup_tab[IND].vertex_format = TEX2_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 12;
+      }
+   }
+   else if (DO_TEX1) {
+      if (DO_PTEX) {
+	 ASSERT(HAVE_PTEX_VERTICES);
+	 setup_tab[IND].vertex_format = PROJ_TEX1_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 12;
+      }
+      else {
+	 setup_tab[IND].vertex_format = TEX1_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 10;
+      }
+   }
+   else if (DO_TEX0) {
+      if (DO_PTEX && HAVE_PTEX_VERTICES) {
+	 setup_tab[IND].vertex_format = PROJ_TEX1_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 12;
+      } else {
+	 setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 8;
+      }
+   }
+   else if (!HAVE_HW_DIVIDE && !DO_SPEC && !DO_FOG && HAVE_TINY_VERTICES) {
+      setup_tab[IND].vertex_format = TINY_VERTEX_FORMAT;
+      setup_tab[IND].vertex_size = 4;
+   } else if (HAVE_NOTEX_VERTICES) {
+      setup_tab[IND].vertex_format = NOTEX_VERTEX_FORMAT;
+      setup_tab[IND].vertex_size = 6;
+   } else {
+      setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT;
+      setup_tab[IND].vertex_size = 8;
+   }
+
+}
+
+
+#undef IND
+#undef TAG
diff --git a/src/server/mach64_dri.h b/src/server/mach64_dri.h
new file mode 100644
index 0000000..139668e
--- /dev/null
+++ b/src/server/mach64_dri.h
@@ -0,0 +1,126 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Leif Delgass <ldelgass@retinalburn.net>
+ */
+
+#ifndef __MACH64_DRI_H__
+#define __MACH64_DRI_H__ 1
+
+#include "xf86drm.h"
+
+typedef struct {
+   drm_handle_t fbHandle;
+
+   drm_handle_t regsHandle;
+   drmSize regsSize;
+
+   int IsPCI;
+
+   drm_handle_t agpHandle;            /* Handle from drmAgpAlloc */
+   unsigned long agpOffset;
+   drmSize agpSize;
+   int agpMode;
+
+   /* DMA descriptor ring */
+   unsigned long     ringStart;        /* Offset into AGP space */
+   drm_handle_t         ringHandle;       /* Handle from drmAddMap */
+   drmSize           ringMapSize;      /* Size of map */
+   int               ringSize;         /* Size of ring (in kB) */
+   drmAddress        ringMap;          /* Map */
+
+   /* vertex buffer data */
+   unsigned long     bufferStart;      /* Offset into AGP space */
+   drm_handle_t         bufferHandle;     /* Handle from drmAddMap */
+   drmSize           bufferMapSize;    /* Size of map */
+   int               bufferSize;       /* Size of buffers (in MB) */
+   drmAddress        bufferMap;        /* Map */
+
+   drmBufMapPtr      drmBuffers;       /* Buffer map */
+   int               numBuffers;       /* Number of buffers */
+
+   /* AGP Texture data */
+   unsigned long     agpTexStart;      /* Offset into AGP space */
+   drm_handle_t         agpTexHandle;     /* Handle from drmAddMap */
+   drmSize           agpTexMapSize;    /* Size of map */
+   int               agpTexSize;       /* Size of AGP tex space (in MB) */
+   drmAddress        agpTexMap;        /* Map */
+   int               log2AGPTexGran;
+
+   int fbX;
+   int fbY;
+   int backX;
+   int backY;
+   int depthX;
+   int depthY;
+
+   int frontOffset;
+   int frontPitch;
+   int backOffset;
+   int backPitch;
+   int depthOffset;
+   int depthPitch;
+
+   int textureOffset;
+   int textureSize;
+   int logTextureGranularity;
+} ATIDRIServerInfoRec, *ATIDRIServerInfoPtr;
+
+typedef struct {
+   int chipset;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+
+   int IsPCI;
+   int AGPMode;
+
+   unsigned int frontOffset;
+   unsigned int frontPitch;
+
+   unsigned int backOffset;
+   unsigned int backPitch;
+
+   unsigned int depthOffset;
+   unsigned int depthPitch;
+
+   unsigned int textureOffset;
+   unsigned int textureSize;
+   int logTextureGranularity;
+
+   drm_handle_t regs;
+   drmSize regsSize;
+
+   drm_handle_t agp;
+   drmSize agpSize;
+   unsigned int agpTextureOffset;
+   unsigned int agpTextureSize;
+   int logAgpTextureGranularity;
+} ATIDRIRec, *ATIDRIPtr;
+
+#endif /* __MACH64_DRI_H__ */
author	Luc Verhaegen <libv@skynet.be>	2010-03-09 07:31:48 +0100
committer	Luc Verhaegen <libv@skynet.be>	2010-03-09 07:31:48 +0100
commit	6f692a811778c3481b404e9c9a2d68504c39af08 (patch)
tree	a1ee6fe7581f8127dcadbf7d4189491718dc3cb6 /src
parent	5dee9b7b19c1aa3a13618b08bc24f00677b5364b (diff)