merged tcl-0-0-branchtcl-0-0-20020612-merge

author: keithw <keithw> 2002-06-12 15:50:23 +0000
committer: keithw <keithw> 2002-06-12 15:50:23 +0000
commit: 9a7c4c799a7ddfe709e590e1eb9ad03102bbb838 (patch)
tree: 4a5310fd7e1d615f2761c5912787195e095b0fa7
parent: 80b74af98bae5fa1fd8d26b3b82d474a7bc4a9ac (diff)
85 files changed, 13465 insertions, 2987 deletions
diff --git a/xc/extras/Mesa/include/GL/xmesa_x.h b/xc/extras/Mesa/include/GL/xmesa_x.h
index 4ac6c8dc9..def044719 100644
--- a/xc/extras/Mesa/include/GL/xmesa_x.h
+++ b/xc/extras/Mesa/include/GL/xmesa_x.h
@@ -30,7 +30,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * Authors:
  *   Kevin E. Martin <kevin@precisioninsight.com>
  *
- * $Header: /home/ajax/dri-backup/xc/xc/extras/Mesa/include/GL/Attic/xmesa_x.h,v 1.6 2002/02/21 11:42:58 alanh Exp $
  */
 
 #ifndef _XMESA_X_H_
diff --git a/xc/extras/Mesa/include/GL/xmesa_xf86.h b/xc/extras/Mesa/include/GL/xmesa_xf86.h
index d2dd74687..983f234dd 100644
--- a/xc/extras/Mesa/include/GL/xmesa_xf86.h
+++ b/xc/extras/Mesa/include/GL/xmesa_xf86.h
@@ -30,7 +30,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * Authors:
  *   Kevin E. Martin <kevin@precisioninsight.com>
  *
- * $Header: /home/ajax/dri-backup/xc/xc/extras/Mesa/include/GL/Attic/xmesa_xf86.h,v 1.7 2002/02/21 11:42:58 alanh Exp $
  */
 
 #ifndef _XMESA_XF86_H_
diff --git a/xc/extras/Mesa/src/OSmesa/osmesa.c b/xc/extras/Mesa/src/OSmesa/osmesa.c
index e15b67ab2..458bb850c 100644
--- a/xc/extras/Mesa/src/OSmesa/osmesa.c
+++ b/xc/extras/Mesa/src/OSmesa/osmesa.c
@@ -671,15 +671,9 @@ do {									\
 
 
 
-static GLboolean set_draw_buffer( GLcontext *ctx, GLenum mode )
+static void set_draw_buffer( GLcontext *ctx, GLenum mode )
 {
    (void) ctx;
-   if (mode==GL_FRONT_LEFT) {
-      return GL_TRUE;
-   }
-   else {
-      return GL_FALSE;
-   }
 }
 
 
diff --git a/xc/extras/Mesa/src/X/xm_dd.c b/xc/extras/Mesa/src/X/xm_dd.c
index ffc22884e..7f26bf757 100644
--- a/xc/extras/Mesa/src/X/xm_dd.c
+++ b/xc/extras/Mesa/src/X/xm_dd.c
@@ -230,6 +230,7 @@ index_mask( GLcontext *ctx, GLuint mask )
       else {
          m = (unsigned long) mask;
       }
+      XMesaSetPlaneMask( xmesa->display, xmesa->xm_buffer->gc, m );
       XMesaSetPlaneMask( xmesa->display, xmesa->xm_buffer->cleargc, m );
    }
 }
@@ -255,6 +256,7 @@ color_mask(GLcontext *ctx,
          if (gmask)   m |= GET_GREENMASK(xmesa->xm_visual);
          if (bmask)   m |= GET_BLUEMASK(xmesa->xm_visual);
       }
+      XMesaSetPlaneMask( xmesa->display, xmesa->xm_buffer->gc, m );
       XMesaSetPlaneMask( xmesa->display, xmesa->xm_buffer->cleargc, m );
    }
 }
diff --git a/xc/extras/Mesa/src/api_arrayelt.c b/xc/extras/Mesa/src/api_arrayelt.c
index 33c3f9dd9..0f7f385f3 100644
--- a/xc/extras/Mesa/src/api_arrayelt.c
+++ b/xc/extras/Mesa/src/api_arrayelt.c
@@ -282,7 +282,7 @@ static void _ae_update_state( GLcontext *ctx )
 	 ta->array = &ctx->Array.TexCoord[i];
 	 ta->func = multitexfuncs[ta->array->Size-1][TYPE_IDX(ta->array->Type)];
 	 ta++;
-      }
+      } 
 
    ta->func = 0;
 
@@ -346,7 +346,7 @@ void _ae_loopback_array_elt( GLint elt )
       _ae_update_state( ctx );
 
    for (ta = actx->texarrays ; ta->func ; ta++) {
-      ta->func( ta->unit, (char *)ta->array->Ptr + elt * ta->array->StrideB );
+      ta->func( ta->unit + GL_TEXTURE0_ARB, (char *)ta->array->Ptr + elt * ta->array->StrideB );
    }
 
    /* Must be last
diff --git a/xc/extras/Mesa/src/attrib.c b/xc/extras/Mesa/src/attrib.c
index a6323ad04..9883ecfb5 100644
--- a/xc/extras/Mesa/src/attrib.c
+++ b/xc/extras/Mesa/src/attrib.c
@@ -1038,7 +1038,7 @@ _mesa_PopAttrib(void)
                      _mesa_set_enable(ctx, GL_CLIP_PLANE0 + i, GL_FALSE );
                   }
                   if (ctx->Driver.ClipPlane)
-                     ctx->Driver.ClipPlane( ctx, i, eyePlane );
+                     ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, eyePlane );
                }
 
                /* normalize/rescale */
diff --git a/xc/extras/Mesa/src/buffers.c b/xc/extras/Mesa/src/buffers.c
index 7481f5ad8..c5a459d91 100644
--- a/xc/extras/Mesa/src/buffers.c
+++ b/xc/extras/Mesa/src/buffers.c
@@ -257,23 +257,11 @@ _mesa_DrawBuffer( GLenum mode )
    }
 
    /*
-    * If we get here there can't have been an error.
-    * Now see if device driver can implement the drawing to the target
-    * buffer(s).  The driver may not be able to do GL_FRONT_AND_BACK mode
-    * for example.  We'll take care of that in the core code by looping
-    * over the individual buffers.
+    * If we get here there can't have been an error.  Now tell the
+    * device driver about it.  
     */
    ASSERT(ctx->Driver.SetDrawBuffer);
-   if ( (*ctx->Driver.SetDrawBuffer)(ctx, ctx->Color.DriverDrawBuffer) ) {
-      /* All OK, the driver will do all buffer writes */
-      ctx->Color.MultiDrawBuffer = GL_FALSE;
-   }
-   else {
-      /* We'll have to loop over the multiple draw buffer targets */
-      ctx->Color.MultiDrawBuffer = GL_TRUE;
-      /* Set drawing buffer to front for now */
-      (void) (*ctx->Driver.SetDrawBuffer)(ctx, GL_FRONT_LEFT);
-   }
+   (*ctx->Driver.SetDrawBuffer)(ctx, ctx->Color.DriverDrawBuffer);
 
    ctx->Color.DrawBuffer = mode;
    ctx->NewState |= _NEW_COLOR;
diff --git a/xc/extras/Mesa/src/context.c b/xc/extras/Mesa/src/context.c
index 689fd5b4e..1ca549958 100644
--- a/xc/extras/Mesa/src/context.c
+++ b/xc/extras/Mesa/src/context.c
@@ -813,7 +813,6 @@ init_attrib_groups( GLcontext *ctx )
    ctx->Color.ColorLogicOpEnabled = GL_FALSE;
    ctx->Color.LogicOp = GL_COPY;
    ctx->Color.DitherFlag = GL_TRUE;
-   ctx->Color.MultiDrawBuffer = GL_FALSE;
 
    /* Current group */
    ASSIGN_4V( ctx->Current.Color, 1.0, 1.0, 1.0, 1.0 );
diff --git a/xc/extras/Mesa/src/dd.h b/xc/extras/Mesa/src/dd.h
index 13553c8d2..29fb9b0c3 100644
--- a/xc/extras/Mesa/src/dd.h
+++ b/xc/extras/Mesa/src/dd.h
@@ -1,4 +1,3 @@
-
 /*
  * Mesa 3-D graphics library
  * Version:  3.5
@@ -78,7 +77,7 @@ struct dd_function_table {
     * settings!  Software Mesa can do masked clears if the device driver can't.
     */
 
-   GLboolean (*SetDrawBuffer)( GLcontext *ctx, GLenum buffer );
+   void (*SetDrawBuffer)( GLcontext *ctx, GLenum buffer );
    /*
     * Specifies the current buffer for writing.
     * The following values must be accepted when applicable:
@@ -86,16 +85,15 @@ struct dd_function_table {
     *    GL_BACK_LEFT - when double buffering
     *    GL_FRONT_RIGHT - when using stereo
     *    GL_BACK_RIGHT - when using stereo and double buffering
-    * The folowing values may optionally be accepted.  Return GL_TRUE
-    * if accepted, GL_FALSE if not accepted.  In practice, only drivers
-    * which can write to multiple color buffers at once should accept
-    * these values.
     *    GL_FRONT - write to front left and front right if it exists
     *    GL_BACK - write to back left and back right if it exists
     *    GL_LEFT - write to front left and back left if it exists
     *    GL_RIGHT - write to right left and back right if they exist
     *    GL_FRONT_AND_BACK - write to all four buffers if they exist
     *    GL_NONE - disable buffer write in device driver.
+    *
+    * Note the driver must organize fallbacks (eg with swrast) if it
+    * cannot implement the requested mode.
     */
 
    void (*GetBufferSize)( GLframebuffer *buffer,
diff --git a/xc/extras/Mesa/src/mtypes.h b/xc/extras/Mesa/src/mtypes.h
index 5e9efe7b1..1fbcb8b49 100644
--- a/xc/extras/Mesa/src/mtypes.h
+++ b/xc/extras/Mesa/src/mtypes.h
@@ -268,7 +268,6 @@ struct gl_colorbuffer_attrib {
 
    GLenum DrawBuffer;			/* Which buffer to draw into */
    GLenum DriverDrawBuffer;		/* Current device driver dest buffer */
-   GLboolean MultiDrawBuffer;		/* Drawing to mutliple buffers? */
    GLubyte DrawDestMask;		/* bitwise-OR of bitflags above */
 
    /* alpha testing */
@@ -1674,7 +1673,7 @@ do {								\
 #define ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, retval)		\
 do {									\
    if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) {	\
-      _mesa_error( ctx, GL_INVALID_OPERATION, "begin/end" );		\
+      _mesa_error( ctx, GL_INVALID_OPERATION, __FUNCTION__ ); \
       return retval;							\
    }									\
 } while (0)
@@ -1682,7 +1681,7 @@ do {									\
 #define ASSERT_OUTSIDE_BEGIN_END(ctx)					\
 do {									\
    if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) {	\
-      _mesa_error( ctx, GL_INVALID_OPERATION, "begin/end" );		\
+      _mesa_error( ctx, GL_INVALID_OPERATION, __FUNCTION__ ); \
       return;								\
    }									\
 } while (0)
diff --git a/xc/extras/Mesa/src/swrast/s_context.c b/xc/extras/Mesa/src/swrast/s_context.c
index b7272a35d..094725f40 100644
--- a/xc/extras/Mesa/src/swrast/s_context.c
+++ b/xc/extras/Mesa/src/swrast/s_context.c
@@ -88,10 +88,8 @@ _swrast_update_rasterflags( GLcontext *ctx )
     * MULTI_DRAW_BIT flag.  Also set it if we're drawing to no
     * buffers or the RGBA or CI mask disables all writes.
     */
-   if (ctx->Color.MultiDrawBuffer) {
-      RasterMask |= MULTI_DRAW_BIT;
-   }
-   else if (ctx->Color.DrawBuffer==GL_NONE) {
+   if (ctx->Color.DrawBuffer == GL_FRONT_AND_BACK ||
+       ctx->Color.DrawBuffer == GL_NONE) {
       RasterMask |= MULTI_DRAW_BIT;
    }
    else if (ctx->Visual.rgbMode && *((GLuint *) ctx->Color.ColorMask) == 0) {
diff --git a/xc/extras/Mesa/src/texformat_tmp.h b/xc/extras/Mesa/src/texformat_tmp.h
index 5c3489662..2d71e1e88 100644
--- a/xc/extras/Mesa/src/texformat_tmp.h
+++ b/xc/extras/Mesa/src/texformat_tmp.h
@@ -213,9 +213,9 @@ static void FETCH(argb1555)( const struct gl_texture_image *texImage,
 {
    const GLushort *src = USHORT_SRC( texImage, i, j, k );
    GLchan *rgba = (GLchan *) texel; GLushort s = *src;
-   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0xf8) * 255 / 0xf8 );
-   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  5) & 0xf8) * 255 / 0xf8 );
-   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf8) * 255 / 0xf8 );
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0x1f) * 255 / 0x1f );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  5) & 0x1f) * 255 / 0x1f );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0x1f) * 255 / 0x1f );
    rgba[ACOMP] = UBYTE_TO_CHAN( ((s >> 15) & 0x01) * 255 );
 }
 
diff --git a/xc/extras/Mesa/src/tnl/t_imm_api.c b/xc/extras/Mesa/src/tnl/t_imm_api.c
index 0544496a7..9b6c6cf43 100644
--- a/xc/extras/Mesa/src/tnl/t_imm_api.c
+++ b/xc/extras/Mesa/src/tnl/t_imm_api.c
@@ -84,7 +84,9 @@ void _tnl_flush_vertices( GLcontext *ctx, GLuint flags )
 	       IM->Flag[IM->Start]);
 
    if (IM->Flag[IM->Start])
-      if ((flags & FLUSH_UPDATE_CURRENT) || IM->Count > IM->Start)
+      if ((flags & FLUSH_UPDATE_CURRENT) || 
+	  IM->Count > IM->Start ||
+	  (IM->Flag[IM->Start] & (VERT_BEGIN|VERT_END)))
 	 _tnl_flush_immediate( IM );
 }
 
@@ -144,16 +146,9 @@ _tnl_save_Begin( GLenum mode )
    ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
    IM->BeginState = state;
 
-   if (ctx->ExecuteFlag) {
-      if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) {
-	 _mesa_error( ctx, GL_INVALID_OPERATION, "_tnl_Begin" );
-      }
-      else 
-	 ctx->Driver.CurrentExecPrimitive = mode;
-   }
-
-
-   /* Update save_primitive now.
+   /* Update save_primitive now.  Don't touch ExecPrimitive as this is
+    * updated in the replay of this cassette if we are in
+    * COMPILE_AND_EXECUTE mode.
     */
    if (ctx->Driver.CurrentSavePrimitive == PRIM_UNKNOWN)
       ctx->Driver.CurrentSavePrimitive = PRIM_INSIDE_UNKNOWN_PRIM;
@@ -189,8 +184,9 @@ _tnl_Begin( GLenum mode )
 
       if (IM->Start == IM->Count &&
 	  tnl->Driver.NotifyBegin &&
-	  tnl->Driver.NotifyBegin( ctx, mode )) 
+	  tnl->Driver.NotifyBegin( ctx, mode )) {
 	 return;
+      }
 
       assert( IM->SavedBeginState == 0 );
       assert( IM->BeginState == 0 );
@@ -209,8 +205,6 @@ _tnl_Begin( GLenum mode )
       IM->LastPrimitive = count;
       IM->BeginState = (VERT_BEGIN_0|VERT_BEGIN_1);
 
-/*        fprintf(stderr, "%s: %x\n", __FUNCTION__, IM->BeginState);  */
-
       ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
       ctx->Driver.CurrentExecPrimitive = mode;
    }
@@ -344,7 +338,11 @@ _tnl_end( GLcontext *ctx )
 
    IM->BeginState = state;
 
-   if (ctx->ExecuteFlag) {
+   /* Only update CurrentExecPrimitive if not compiling.  If we are in
+    * COMPILE_AND_EXECUTE mode, it will be done on replay of this
+    * cassette.
+    */
+   if (!ctx->CompileFlag) {
       if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END) 
 	 _mesa_error( ctx, GL_INVALID_OPERATION, "_tnl_End" );
       else
diff --git a/xc/extras/Mesa/src/tnl/t_imm_dlist.c b/xc/extras/Mesa/src/tnl/t_imm_dlist.c
index 0b0f0f841..257b6e47c 100644
--- a/xc/extras/Mesa/src/tnl/t_imm_dlist.c
+++ b/xc/extras/Mesa/src/tnl/t_imm_dlist.c
@@ -366,6 +366,12 @@ execute_compiled_cassette( GLcontext *ctx, void *data )
 	 ctx->Driver.CurrentExecPrimitive =
 	    IM->Primitive[IM->LastPrimitive] & PRIM_MODE_MASK;
       }
+      
+/*        fprintf(stderr, "%s: IM->Primitive[%d]: %x, CurrExecPrim: %x\n", */
+/*  	      __FUNCTION__, */
+/*  	      IM->LastPrimitive, */
+/*  	      IM->Primitive[IM->LastPrimitive],  */
+/*  	      ctx->Driver.CurrentExecPrimitive); */
 
       _tnl_get_exec_copy_verts( ctx, IM );
 
diff --git a/xc/extras/Mesa/src/tnl_dd/t_dd_dmatmp2.h b/xc/extras/Mesa/src/tnl_dd/t_dd_dmatmp2.h
new file mode 100644
index 000000000..928d065d5
--- /dev/null
+++ b/xc/extras/Mesa/src/tnl_dd/t_dd_dmatmp2.h
@@ -0,0 +1,1094 @@
+/* $Id: t_dd_dmatmp2.h,v 1.2 2002/06/12 15:50:25 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+/* Template for render stages which build and emit vertices directly
+ * to fixed-size dma buffers.  Useful for rendering strips and other
+ * native primitives where clipping and per-vertex tweaks such as
+ * those in t_dd_tritmp.h are not required.
+ *
+ */
+
+#if !HAVE_TRIANGLES || !HAVE_POINTS || !HAVE_LINES
+#error "must have points, lines & triangles to use render template"
+#endif
+
+#if !HAVE_TRI_STRIPS || !HAVE_TRI_FANS
+#error "must have tri strip and fans to use render template"
+#endif
+
+#if !HAVE_LINE_STRIPS
+#error "must have line strips to use render template"
+#endif
+
+#if !HAVE_POLYGONS
+#error "must have polygons to use render template"
+#endif
+
+#if !HAVE_ELTS
+#error "must have elts to use render template"
+#endif
+
+
+#ifndef EMIT_TWO_ELTS
+#define EMIT_TWO_ELTS( offset, elt0, elt1 )	\
+do { 						\
+   EMIT_ELT( offset, elt0 ); 			\
+   EMIT_ELT( offset+1, elt1 ); 			\
+} while (0)
+#endif
+
+
+/**********************************************************************/
+/*                  Render whole begin/end objects                    */
+/**********************************************************************/
+
+
+static void TAG(emit_elts)( GLcontext *ctx, GLuint *elts, GLuint nr )
+{
+   GLint i;
+   LOCAL_VARS;
+   ELTS_VARS;
+
+   ALLOC_ELTS( nr );
+
+   for ( i = 0 ; i < nr ; i+=2, elts += 2 ) {
+      EMIT_TWO_ELTS( 0, elts[0], elts[1] );
+      INCR_ELTS( 2 );
+   }
+}
+
+static void TAG(emit_consecutive_elts)( GLcontext *ctx, GLuint start, GLuint nr )
+{
+   GLint i;
+   LOCAL_VARS;
+   ELTS_VARS;
+
+   ALLOC_ELTS( nr );
+
+   for ( i = 0 ; i+1 < nr ; i+=2, start += 2 ) {
+      EMIT_TWO_ELTS( 0, start, start+1 );
+      INCR_ELTS( 2 );
+   }
+   if (i < nr) {
+      EMIT_ELT( 0, start );
+      INCR_ELTS( 1 );
+   }
+}
+
+/***********************************************************************
+ *                    Render non-indexed primitives.
+ ***********************************************************************/
+
+
+
+static void TAG(render_points_verts)( GLcontext *ctx,
+				      GLuint start,
+				      GLuint count,
+				      GLuint flags )
+{
+   LOCAL_VARS;
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+   EMIT_PRIM( ctx, GL_POINTS, HW_POINTS, start, count );
+}
+
+static void TAG(render_lines_verts)( GLcontext *ctx,
+				     GLuint start,
+				     GLuint count,
+				     GLuint flags )
+{
+   LOCAL_VARS;
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+   count -= (count-start) & 1;
+
+   if (start+1 >= count)
+      return;
+
+   if ((flags & PRIM_BEGIN) && ctx->Line.StippleFlag) {
+      RESET_STIPPLE();
+      AUTO_STIPPLE( GL_TRUE );
+   }
+      
+   EMIT_PRIM( ctx, GL_LINES, HW_LINES, start, count );
+
+   if ((flags & PRIM_END) && ctx->Line.StippleFlag)
+      AUTO_STIPPLE( GL_FALSE );
+}
+
+
+static void TAG(render_line_strip_verts)( GLcontext *ctx,
+					  GLuint start,
+					  GLuint count,
+					  GLuint flags )
+{
+   LOCAL_VARS;
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (start+1 >= count)
+      return;
+
+   if ((flags & PRIM_BEGIN) && ctx->Line.StippleFlag)
+      RESET_STIPPLE();
+
+
+   if (PREFER_DISCREET_ELT_PRIM( count-start, HW_LINES ))
+   {   
+      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+      int currentsz;
+      GLuint j, nr;
+
+      ELT_INIT( GL_LINES, HW_LINES );
+
+      /* Emit whole number of lines in each full buffer.
+       */
+      dmasz = dmasz/2;
+      currentsz = GET_CURRENT_VB_MAX_ELTS();
+      currentsz = currentsz/2;
+
+      if (currentsz < 4) {
+	 NEW_BUFFER();
+	 currentsz = dmasz;
+      }
+
+      for (j = start; j + 1 < count; j += nr - 1 ) {
+	 GLint i;
+	 ELTS_VARS;
+	 nr = MIN2( currentsz, count - j );
+	    
+	 ALLOC_ELTS( (nr-1)*2 );
+	    
+	 for ( i = j ; i+1 < j+nr ; i+=1 ) {
+	    EMIT_TWO_ELTS( 0, (i+0), (i+1) );
+	    INCR_ELTS( 2 );
+	 }
+
+	 if (nr == currentsz) {
+	    NEW_BUFFER();
+	    currentsz = dmasz;
+	 }
+      }
+   }
+   else
+      EMIT_PRIM( ctx, GL_LINE_STRIP, HW_LINE_STRIP, start, count );
+}
+
+
+static void TAG(render_line_loop_verts)( GLcontext *ctx,
+					 GLuint start,
+					 GLuint count,
+					 GLuint flags )
+{
+   LOCAL_VARS;
+   GLuint j, nr;
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (flags & PRIM_BEGIN) {
+      j = start;
+      if (ctx->Line.StippleFlag)
+	 RESET_STIPPLE( );
+   }
+   else
+      j = start + 1;
+
+   if (flags & PRIM_END) {
+
+      if (start+1 >= count)
+	 return;
+
+      if (PREFER_DISCREET_ELT_PRIM( count-start, HW_LINES )) {
+	 int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+	 int currentsz;
+
+	 ELT_INIT( GL_LINES, HW_LINES );
+
+	 /* Emit whole number of lines in each full buffer.
+	  */
+	 dmasz = dmasz/2;
+	 currentsz = GET_CURRENT_VB_MAX_ELTS();
+	 currentsz = currentsz/2;
+
+	 if (currentsz < 4) {
+	    NEW_BUFFER();
+	    currentsz = dmasz;
+	 }
+
+	 /* Ensure last vertex doesn't wrap:
+	  */
+	 currentsz--;
+	 dmasz--;
+
+	 for (; j + 1 < count;  ) {
+	    GLint i;
+	    ELTS_VARS;
+	    nr = MIN2( currentsz, count - j );
+	    
+	    ALLOC_ELTS( (nr-1)*2 );	    
+	    for ( i = j ; i+1 < j+nr ; i+=1 ) {
+	       EMIT_TWO_ELTS( 0, (i+0), (i+1) );
+	       INCR_ELTS( 2 );
+	    }
+
+	    j += nr - 1;
+	    if (j + 1 < count) {
+	       NEW_BUFFER();
+	       currentsz = dmasz;
+	    }
+ 	    else { 
+ 	       ALLOC_ELTS( 2 ); 
+ 	       EMIT_TWO_ELTS( 0, (j), (start) ); 
+ 	       INCR_ELTS( 2 ); 
+ 	    } 
+	 }
+      }
+      else
+      {
+	 int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+	 int currentsz;
+
+	 ELT_INIT( GL_LINE_STRIP, HW_LINE_STRIP );
+
+	 currentsz = GET_CURRENT_VB_MAX_ELTS();
+
+	 if (currentsz < 8) {
+	    NEW_BUFFER();
+	    currentsz = dmasz;
+	 }
+
+	 /* Ensure last vertex doesn't wrap:
+	  */
+	 currentsz--;
+	 dmasz--;
+
+	 for ( ; j + 1 < count;  ) {
+	    nr = MIN2( currentsz, count - j );
+	    if (j + nr < count) {
+	       TAG(emit_consecutive_elts)( ctx, j, nr );
+	       currentsz = dmasz;
+	       j += nr - 1;
+	       NEW_BUFFER();
+	    }
+	    else if (nr) {
+	       ELTS_VARS;
+	       int i;
+
+	       ALLOC_ELTS( nr + 1 );
+	       for ( i = 0 ; i+1 < nr ; i+=2, j += 2 ) {
+		  EMIT_TWO_ELTS( 0, j, j+1 );
+		  INCR_ELTS( 2 );
+	       }
+	       if (i < nr) {
+		  EMIT_ELT( 0, j ); j++;
+		  INCR_ELTS( 1 );
+	       }
+	       EMIT_ELT( 0, start );
+	       INCR_ELTS( 1 );
+	       NEW_BUFFER();
+	    }
+	    else {
+	       fprintf(stderr, "warining nr==0\n");
+	    }
+	 }   
+      }
+   } else {
+      TAG(render_line_strip_verts)( ctx, j, count, flags );
+   }
+}
+
+
+static void TAG(render_triangles_verts)( GLcontext *ctx,
+					 GLuint start,
+					 GLuint count,
+					 GLuint flags )
+{
+   LOCAL_VARS;
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   count -= (count-start)%3;
+
+   if (start+2 >= count) {
+      return;
+   }
+
+   /* need a PREFER_DISCREET_ELT_PRIM here too..
+    */
+   EMIT_PRIM( ctx, GL_TRIANGLES, HW_TRIANGLES, start, count );
+}
+
+
+
+static void TAG(render_tri_strip_verts)( GLcontext *ctx,
+					 GLuint start,
+					 GLuint count,
+					 GLuint flags )
+{
+   LOCAL_VARS;
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (start + 2 >= count)
+      return;
+
+   if (PREFER_DISCREET_ELT_PRIM( count-start, HW_TRIANGLES ))
+   {   
+      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+      int currentsz;
+      int parity = 0;
+      GLuint j, nr;
+
+      ELT_INIT( GL_TRIANGLES, HW_TRIANGLES );
+
+      if (flags & PRIM_PARITY)
+	 parity = 1;
+
+      /* Emit even number of tris in each full buffer.
+       */
+      dmasz = dmasz/3;
+      dmasz -= dmasz & 1;
+      currentsz = GET_CURRENT_VB_MAX_ELTS();
+      currentsz = currentsz/3;
+      currentsz -= currentsz & 1;
+
+      if (currentsz < 4) {
+	 NEW_BUFFER();
+	 currentsz = dmasz;
+      }
+
+      for (j = start; j + 2 < count; j += nr - 2 ) {
+	 GLint i;
+	 ELTS_VARS;
+	 nr = MIN2( currentsz, count - j );
+	    
+	 ALLOC_ELTS( (nr-2)*3 );
+	    
+	 for ( i = j ; i+2 < j+nr ; i++, parity^=1 ) {
+	    EMIT_ELT( 0, (i+0+parity) );
+	    EMIT_ELT( 1, (i+1-parity) );
+	    EMIT_ELT( 2, (i+2) );
+	    INCR_ELTS( 3 );
+	 }
+
+	 if (nr == currentsz) {
+	    NEW_BUFFER();
+	    currentsz = dmasz;
+	 }
+      }
+   }
+   else if ((flags & PRIM_PARITY) == 0)  
+      EMIT_PRIM( ctx, GL_TRIANGLE_STRIP, HW_TRIANGLE_STRIP_0, start, count );
+   else if (HAVE_TRI_STRIP_1)
+      EMIT_PRIM( ctx, GL_TRIANGLE_STRIP, HW_TRIANGLE_STRIP_1, start, count );
+   else {
+      /* Emit the first triangle with elts, then the rest as a regular strip.
+       * TODO:  Make this unlikely in t_imm_api.c
+       */
+      ELTS_VARS;
+      ELT_INIT( GL_TRIANGLES, HW_TRIANGLES );
+      ALLOC_ELTS( 3 );
+      EMIT_ELT( 0, (start+1) );
+      EMIT_ELT( 1, (start+0) );
+      EMIT_ELT( 2, (start+2) );
+      INCR_ELTS( 3 );
+      NEW_PRIMITIVE();
+
+      start++;
+      if (start + 2 >= count)
+	 return;
+
+      EMIT_PRIM( ctx, GL_TRIANGLE_STRIP, HW_TRIANGLE_STRIP_0, start, 
+		 count );
+   }
+}
+
+static void TAG(render_tri_fan_verts)( GLcontext *ctx,
+				       GLuint start,
+				       GLuint count,
+				       GLuint flags )
+{
+   LOCAL_VARS;
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (start+2 >= count) 
+      return;
+
+   if (PREFER_DISCREET_ELT_PRIM( count-start, HW_TRIANGLES ))
+   {   
+      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+      int currentsz;
+      GLuint j, nr;
+
+      ELT_INIT( GL_TRIANGLES, HW_TRIANGLES );
+
+      dmasz = dmasz/3;
+      currentsz = GET_CURRENT_VB_MAX_ELTS();
+      currentsz = currentsz/3;
+
+      if (currentsz < 4) {
+	 NEW_BUFFER();
+	 currentsz = dmasz;
+      }
+
+      for (j = start + 1; j + 1 < count; j += nr - 1 ) {
+	 GLint i;
+	 ELTS_VARS;
+	 nr = MIN2( currentsz, count - j );
+	    
+	 ALLOC_ELTS( (nr-1)*3 );
+	    
+	 for ( i = j ; i+1 < j+nr ; i++ ) {
+	    EMIT_ELT( 0, (start) );
+	    EMIT_ELT( 1, (i) );
+	    EMIT_ELT( 2, (i+1) );
+	    INCR_ELTS( 3 );
+	 }
+
+	 if (nr == currentsz) {
+	    NEW_BUFFER();
+	    currentsz = dmasz;
+	 }
+      }
+   }
+   else {
+      EMIT_PRIM( ctx, GL_TRIANGLE_FAN, HW_TRIANGLE_FAN, start, count );
+   }
+}
+
+
+static void TAG(render_poly_verts)( GLcontext *ctx,
+				    GLuint start,
+				    GLuint count,
+				    GLuint flags )
+{
+   LOCAL_VARS;
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (start+2 >= count) 
+      return;
+
+   EMIT_PRIM( ctx, GL_POLYGON, HW_POLYGON, start, count );
+}
+
+static void TAG(render_quad_strip_verts)( GLcontext *ctx,
+					  GLuint start,
+					  GLuint count,
+					  GLuint flags )
+{
+   LOCAL_VARS;
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   count -= (count-start) & 1;
+
+   if (start+3 >= count) 
+      return;
+
+   if (HAVE_QUAD_STRIPS) {
+      EMIT_PRIM( ctx, GL_QUAD_STRIP, HW_QUAD_STRIP, start, count );
+   } 
+   else if (ctx->_TriangleCaps & DD_FLATSHADE) {
+      LOCAL_VARS;
+      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+      int currentsz;
+      GLuint j, nr;
+
+      ELT_INIT( GL_TRIANGLES, HW_TRIANGLES );
+
+      currentsz = GET_CURRENT_VB_MAX_ELTS();
+
+      /* Emit whole number of quads in total, and in each buffer.
+       */
+      currentsz = (currentsz/6)*2;
+      dmasz = (dmasz/6)*2;
+
+      if (currentsz < 4) {
+	 NEW_BUFFER();
+	 currentsz = dmasz;
+      }
+
+      for (j = start; j + 3 < count; j += nr - 2 ) {
+	 ELTS_VARS;
+	 GLint quads, i;
+
+	 nr = MIN2( currentsz, count - j );
+	 quads = (nr/2)-1;
+	    
+	 ALLOC_ELTS( quads*6 );
+	    
+	 for ( i = j ; i < j+quads*2 ; i+=2 ) {
+	    EMIT_TWO_ELTS( 0, (i+0), (i+1) );
+	    EMIT_TWO_ELTS( 2, (i+2), (i+1) );
+	    EMIT_TWO_ELTS( 4, (i+3), (i+2) );
+	    INCR_ELTS( 6 );
+	 }
+
+	 if (nr == currentsz) {
+	    NEW_BUFFER();
+	    currentsz = dmasz;
+	 }
+      }
+   }
+   else {
+      EMIT_PRIM( ctx, GL_TRIANGLE_STRIP, HW_TRIANGLE_STRIP_0, start, count );
+   }
+}
+
+
+static void TAG(render_quads_verts)( GLcontext *ctx,
+				     GLuint start,
+				     GLuint count,
+				     GLuint flags )
+{
+   LOCAL_VARS;
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+   count -= (count-start)%4;
+
+   if (start+3 >= count) 
+      return;
+
+   if (HAVE_QUADS) {
+      EMIT_PRIM( ctx, HW_QUADS, GL_QUADS, start, count );
+   } 
+   else {
+      /* Hardware doesn't have a quad primitive type -- simulate it
+       * using indexed vertices and the triangle primitive: 
+       */
+      LOCAL_VARS;
+      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+      int currentsz;
+      GLuint j, nr;
+
+      ELT_INIT( GL_TRIANGLES, HW_TRIANGLES );
+      currentsz = GET_CURRENT_VB_MAX_ELTS();
+
+      /* Adjust for rendering as triangles:
+       */
+      currentsz = (currentsz/6)*4;
+      dmasz = (dmasz/6)*4;
+
+      if (currentsz < 8) {
+	 NEW_BUFFER();
+	 currentsz = dmasz;
+      }
+
+      for (j = start; j < count; j += nr ) {
+	 ELTS_VARS;
+	 GLint quads, i;
+	 nr = MIN2( currentsz, count - j );
+	 quads = nr/4;
+
+	 ALLOC_ELTS( quads*6 );
+
+	 for ( i = j ; i < j+quads*4 ; i+=4 ) {
+	    EMIT_TWO_ELTS( 0, (i+0), (i+1) );
+	    EMIT_TWO_ELTS( 2, (i+3), (i+1) );
+	    EMIT_TWO_ELTS( 4, (i+2), (i+3) );
+	    INCR_ELTS( 6 );
+	 }
+
+	 if (nr == currentsz) {
+	    NEW_BUFFER();
+	    currentsz = dmasz;
+	 }
+      }
+   }
+}
+
+static void TAG(render_noop)( GLcontext *ctx,
+			      GLuint start,
+			      GLuint count,
+			      GLuint flags )
+{
+}
+
+
+
+
+static render_func TAG(render_tab_verts)[GL_POLYGON+2] =
+{
+   TAG(render_points_verts),
+   TAG(render_lines_verts),
+   TAG(render_line_loop_verts),
+   TAG(render_line_strip_verts),
+   TAG(render_triangles_verts),
+   TAG(render_tri_strip_verts),
+   TAG(render_tri_fan_verts),
+   TAG(render_quads_verts),
+   TAG(render_quad_strip_verts),
+   TAG(render_poly_verts),
+   TAG(render_noop),
+};
+
+
+/****************************************************************************
+ *                 Render elts using hardware indexed verts                 *
+ ****************************************************************************/
+
+static void TAG(render_points_elts)( GLcontext *ctx,
+				     GLuint start,
+				     GLuint count,
+				     GLuint flags )
+{
+   LOCAL_VARS;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+   GLuint *elts = GET_ELTS();
+   GLuint j, nr;
+
+   ELT_INIT( GL_POINTS, HW_POINTS );
+
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
+
+   for (j = start; j < count; j += nr ) {
+      nr = MIN2( currentsz, count - j );
+      TAG(emit_elts)( ctx, elts+j, nr );
+      NEW_PRIMITIVE();
+      currentsz = dmasz;
+   }
+}
+
+
+
+static void TAG(render_lines_elts)( GLcontext *ctx,
+				    GLuint start,
+				    GLuint count,
+				    GLuint flags )
+{
+   LOCAL_VARS;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+   GLuint *elts = GET_ELTS();
+   GLuint j, nr;
+
+   if (start+1 >= count)
+      return;
+
+   if ((flags & PRIM_BEGIN) && ctx->Line.StippleFlag) {
+      RESET_STIPPLE();
+      AUTO_STIPPLE( GL_TRUE );
+   }
+
+   ELT_INIT( GL_LINES, HW_LINES );
+
+   /* Emit whole number of lines in total and in each buffer:
+    */
+   count -= (count-start) & 1;
+   currentsz -= currentsz & 1;
+   dmasz -= dmasz & 1;
+
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
+
+   for (j = start; j < count; j += nr ) {
+      nr = MIN2( currentsz, count - j );
+      TAG(emit_elts)( ctx, elts+j, nr );
+      NEW_PRIMITIVE();
+      currentsz = dmasz;
+   }
+
+   if ((flags & PRIM_END) && ctx->Line.StippleFlag)
+      AUTO_STIPPLE( GL_FALSE );
+}
+
+
+static void TAG(render_line_strip_elts)( GLcontext *ctx,
+					 GLuint start,
+					 GLuint count,
+					 GLuint flags )
+{
+   LOCAL_VARS;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+   GLuint *elts = GET_ELTS();
+   GLuint j, nr;
+
+   if (start+1 >= count)
+      return;
+
+   ELT_INIT( GL_LINE_STRIP, HW_LINE_STRIP );
+
+   if ((flags & PRIM_BEGIN) && ctx->Line.StippleFlag)
+      RESET_STIPPLE();
+
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
+
+   for (j = start; j + 1 < count; j += nr - 1 ) {
+      nr = MIN2( currentsz, count - j );
+      TAG(emit_elts)( ctx, elts+j, nr );
+      NEW_PRIMITIVE();
+      currentsz = dmasz;
+   }
+}
+
+
+static void TAG(render_line_loop_elts)( GLcontext *ctx,
+					GLuint start,
+					GLuint count,
+					GLuint flags )
+{
+   LOCAL_VARS;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+   GLuint *elts = GET_ELTS();
+   GLuint j, nr;
+
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (flags & PRIM_BEGIN)
+      j = start;
+   else
+      j = start + 1;
+
+   
+   if (flags & PRIM_END) {
+      if (start+1 >= count)
+	 return;
+   } 
+   else {
+      if (j+1 >= count)
+	 return;
+   }
+
+   ELT_INIT( GL_LINE_STRIP, HW_LINE_STRIP );
+
+   if ((flags & PRIM_BEGIN) && ctx->Line.StippleFlag)
+      RESET_STIPPLE();
+
+   
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8) {
+      NEW_BUFFER();
+      currentsz = dmasz;
+   }
+
+   /* Ensure last vertex doesn't wrap:
+    */
+   currentsz--;
+   dmasz--;
+
+   for ( ; j + 1 < count; j += nr - 1 ) {
+      nr = MIN2( currentsz, count - j );
+      TAG(emit_elts)( ctx, elts+j, nr );
+      currentsz = dmasz;
+   }
+
+   if (flags & PRIM_END)
+      TAG(emit_elts)( ctx, elts+start, 1 );
+
+   NEW_PRIMITIVE();
+}
+
+
+static void TAG(render_triangles_elts)( GLcontext *ctx,
+					GLuint start,
+					GLuint count,
+					GLuint flags )
+{
+   LOCAL_VARS;
+   GLuint *elts = GET_ELTS();
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS()/3*3;
+   int currentsz;
+   GLuint j, nr;
+
+   if (start+2 >= count)
+      return;
+
+/*     NEW_PRIMITIVE(); */
+   ELT_INIT( GL_TRIANGLES, HW_TRIANGLES );
+
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+
+   /* Emit whole number of tris in total.  dmasz is already a multiple
+    * of 3.
+    */
+   count -= (count-start)%3;
+   currentsz -= currentsz%3;
+   if (currentsz < 8)
+      currentsz = dmasz;
+
+   for (j = start; j < count; j += nr) {
+      nr = MIN2( currentsz, count - j );
+      TAG(emit_elts)( ctx, elts+j, nr );
+      NEW_PRIMITIVE();
+      currentsz = dmasz;
+   }
+}
+
+
+
+static void TAG(render_tri_strip_elts)( GLcontext *ctx,
+					GLuint start,
+					GLuint count,
+					GLuint flags )
+{
+   LOCAL_VARS;
+   GLuint j, nr;
+   GLuint *elts = GET_ELTS();
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+
+   if (start+2 >= count)
+      return;
+
+   ELT_INIT( GL_TRIANGLE_STRIP, HW_TRIANGLE_STRIP_0 );
+
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8) {
+      NEW_BUFFER();
+      currentsz = dmasz;
+   }
+
+   if ((flags & PRIM_PARITY) && count - start > 2) {
+      TAG(emit_elts)( ctx, elts+start, 1 );
+      currentsz--;
+   }
+
+   /* Keep the same winding over multiple buffers:
+    */
+   dmasz -= (dmasz & 1);
+   currentsz -= (currentsz & 1);
+
+   for (j = start ; j + 2 < count; j += nr - 2 ) {
+      nr = MIN2( currentsz, count - j );
+      TAG(emit_elts)( ctx, elts+j, nr );
+      NEW_PRIMITIVE();
+      currentsz = dmasz;
+   }
+}
+
+static void TAG(render_tri_fan_elts)( GLcontext *ctx,
+				      GLuint start,
+				      GLuint count,
+				      GLuint flags )
+{
+   LOCAL_VARS;
+   GLuint *elts = GET_ELTS();
+   GLuint j, nr;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+
+   if (start+2 >= count)
+      return;
+
+   ELT_INIT( GL_TRIANGLE_FAN, HW_TRIANGLE_FAN );
+
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8) {
+      NEW_BUFFER();
+      currentsz = dmasz;
+   }
+
+   for (j = start + 1 ; j + 1 < count; j += nr - 1 ) {
+      nr = MIN2( currentsz, count - j + 1 );
+      TAG(emit_elts)( ctx, elts+start, 1 );
+      TAG(emit_elts)( ctx, elts+j, nr - 1 );
+      NEW_PRIMITIVE();
+      currentsz = dmasz;
+   }
+}
+
+
+static void TAG(render_poly_elts)( GLcontext *ctx,
+				   GLuint start,
+				   GLuint count,
+				   GLuint flags )
+{
+   LOCAL_VARS;
+   GLuint *elts = GET_ELTS();
+   GLuint j, nr;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+
+   if (start+2 >= count)
+      return;
+
+   ELT_INIT( GL_POLYGON, HW_POLYGON );
+
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8) {
+      NEW_BUFFER();
+      currentsz = dmasz;
+   }
+
+   for (j = start + 1 ; j + 1 < count ; j += nr - 1 ) {
+      nr = MIN2( currentsz, count - j + 1 );
+      TAG(emit_elts)( ctx, elts+start, 1 );
+      TAG(emit_elts)( ctx, elts+j, nr - 1 );
+      NEW_PRIMITIVE();
+      currentsz = dmasz;
+   }
+}
+
+static void TAG(render_quad_strip_elts)( GLcontext *ctx,
+					 GLuint start,
+					 GLuint count,
+					 GLuint flags )
+{
+   if (start+3 >= count)
+      return;
+
+   if (HAVE_QUAD_STRIPS && 0) {
+   }
+   else {
+      LOCAL_VARS;
+      GLuint *elts = GET_ELTS();
+      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+      int currentsz;
+      GLuint j, nr;
+
+      NEW_PRIMITIVE();
+      currentsz = GET_CURRENT_VB_MAX_ELTS();
+
+      /* Emit whole number of quads in total, and in each buffer.
+       */
+      dmasz -= dmasz & 1;
+      count -= (count-start) & 1;
+      currentsz -= currentsz & 1;
+
+      if (currentsz < 12)
+	 currentsz = dmasz;
+
+      if (ctx->_TriangleCaps & DD_FLATSHADE) {
+	 ELT_INIT( GL_TRIANGLES, HW_TRIANGLES );
+
+	 currentsz = currentsz/6*2;
+	 dmasz = dmasz/6*2;
+
+	 for (j = start; j + 3 < count; j += nr - 2 ) {
+	    nr = MIN2( currentsz, count - j );
+
+	    if (nr >= 4)
+	    {
+	       GLint i;
+	       GLint quads = (nr/2)-1;
+	       ELTS_VARS;
+
+	       ALLOC_ELTS( quads*6 );
+
+	       for ( i = j-start ; i < j-start+quads ; i++, elts += 2 ) {
+		  EMIT_TWO_ELTS( 0, elts[0], elts[1] );
+		  EMIT_TWO_ELTS( 2, elts[2], elts[1] );
+		  EMIT_TWO_ELTS( 4, elts[3], elts[2] );
+		  INCR_ELTS( 6 );
+	       }
+
+	       NEW_PRIMITIVE();
+	    }
+
+	    currentsz = dmasz;
+	 }
+      }
+      else {
+	 ELT_INIT( GL_TRIANGLE_STRIP, HW_TRIANGLE_STRIP_0 );
+
+	 for (j = start; j + 3 < count; j += nr - 2 ) {
+	    nr = MIN2( currentsz, count - j );
+	    TAG(emit_elts)( ctx, elts+j, nr );
+	    NEW_PRIMITIVE();
+	    currentsz = dmasz;
+	 }
+      }
+   }
+}
+
+
+static void TAG(render_quads_elts)( GLcontext *ctx,
+				    GLuint start,
+				    GLuint count,
+				    GLuint flags )
+{
+   if (start+3 >= count)
+      return;
+
+   if (HAVE_QUADS && 0) {
+   } else {
+      LOCAL_VARS;
+      GLuint *elts = GET_ELTS();
+      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+      int currentsz;
+      GLuint j, nr;
+
+      ELT_INIT( GL_TRIANGLES, HW_TRIANGLES );
+      currentsz = GET_CURRENT_VB_MAX_ELTS();
+
+      /* Emit whole number of quads in total, and in each buffer.
+       */
+      dmasz -= dmasz & 3;
+      count -= (count-start) & 3;
+      currentsz -= currentsz & 3;
+
+      /* Adjust for rendering as triangles:
+       */
+      currentsz = currentsz/6*4;
+      dmasz = dmasz/6*4;
+
+      if (currentsz < 8)
+	 currentsz = dmasz;
+
+      for (j = start; j + 3 < count; j += nr - 2 ) {
+	 nr = MIN2( currentsz, count - j );
+
+	 if (nr >= 4)
+	 {
+	    GLint quads = nr/4;
+	    GLint i;
+	    ELTS_VARS;
+	    ALLOC_ELTS( quads * 6 );
+
+	    for ( i = j-start ; i < j-start+quads ; i++, elts += 4 ) {
+	       EMIT_TWO_ELTS( 0, elts[0], elts[1] );
+	       EMIT_TWO_ELTS( 2, elts[3], elts[1] );
+	       EMIT_TWO_ELTS( 4, elts[2], elts[3] );
+	       INCR_ELTS( 6 );
+	    }
+	 }
+
+	 NEW_PRIMITIVE();
+	 currentsz = dmasz;
+      }
+   }
+}
+
+
+
+static render_func TAG(render_tab_elts)[GL_POLYGON+2] =
+{
+   TAG(render_points_elts),
+   TAG(render_lines_elts),
+   TAG(render_line_loop_elts),
+   TAG(render_line_strip_elts),
+   TAG(render_triangles_elts),
+   TAG(render_tri_strip_elts),
+   TAG(render_tri_fan_elts),
+   TAG(render_quads_elts),
+   TAG(render_quad_strip_elts),
+   TAG(render_poly_elts),
+   TAG(render_noop),
+};
diff --git a/xc/extras/Mesa/src/tnl_dd/t_dd_triemit.h b/xc/extras/Mesa/src/tnl_dd/t_dd_triemit.h
new file mode 100644
index 000000000..e3063e4dd
--- /dev/null
+++ b/xc/extras/Mesa/src/tnl_dd/t_dd_triemit.h
@@ -0,0 +1,117 @@
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( j, vb, vertsize, v )				\
+do {									\
+	int __tmp;							\
+	__asm__ __volatile__( "rep ; movsl"				\
+			      : "=%c" (j), "=D" (vb), "=S" (__tmp)	\
+			      : "0" (vertsize),				\
+			        "D" ((long)vb),				\
+			        "S" ((long)v) );			\
+} while (0)
+#else
+#define COPY_DWORDS( j, vb, vertsize, v )				\
+do {									\
+   for ( j = 0 ; j < vertsize ; j++ )					\
+      vb[j] = ((GLuint *)v)[j];						\
+   vb += vertsize;							\
+} while (0)
+#endif
+
+
+
+#if HAVE_QUADS
+static __inline void TAG(quad)( CTX_ARG,
+				VERTEX *v0,
+				VERTEX *v1,
+				VERTEX *v2,
+				VERTEX *v3 )
+{
+   GLuint vertsize = GET_VERTEX_DWORDS();
+   GLuint *vb = (GLuint *)ALLOC_VERTS( 4, vertsize);
+   GLuint j;
+
+   COPY_DWORDS( j, vb, vertsize, v0 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+   COPY_DWORDS( j, vb, vertsize, v2 );
+   COPY_DWORDS( j, vb, vertsize, v3 );
+}
+#else
+static __inline void TAG(quad)( CTX_ARG,
+				VERTEX *v0,
+				VERTEX *v1,
+				VERTEX *v2,
+				VERTEX *v3 )
+{
+   GLuint vertsize = GET_VERTEX_DWORDS();
+   GLuint *vb = (GLuint *)ALLOC_VERTS(  6, vertsize);
+   GLuint j;
+
+   COPY_DWORDS( j, vb, vertsize, v0 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+   COPY_DWORDS( j, vb, vertsize, v3 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+   COPY_DWORDS( j, vb, vertsize, v2 );
+   COPY_DWORDS( j, vb, vertsize, v3 );
+}
+#endif
+
+
+static __inline void TAG(triangle)( CTX_ARG,
+				    VERTEX *v0,
+				    VERTEX *v1,
+				    VERTEX *v2 )
+{
+   GLuint vertsize = GET_VERTEX_DWORDS();
+   GLuint *vb = (GLuint *)ALLOC_VERTS( 3, vertsize);
+   GLuint j;
+
+   COPY_DWORDS( j, vb, vertsize, v0 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+   COPY_DWORDS( j, vb, vertsize, v2 );
+}
+
+
+#if HAVE_LINES
+static __inline void TAG(line)( CTX_ARG,
+				VERTEX *v0,
+				VERTEX *v1 )
+{
+   GLuint vertsize = GET_VERTEX_DWORDS();
+   GLuint *vb = (GLuint *)ALLOC_VERTS( 2, vertsize);
+   GLuint j;
+
+   COPY_DWORDS( j, vb, vertsize, v0 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+}
+#endif
+
+#if HAVE_POINTS
+static __inline void TAG(point)( CTX_ARG,
+				 VERTEX *v0 )
+{
+   GLuint vertsize = GET_VERTEX_DWORDS();
+   GLuint *vb = (GLuint *)ALLOC_VERTS( 1, vertsize);
+   int j;
+
+   COPY_DWORDS( j, vb, vertsize, v0 );
+}
+#endif
+
+
+static void TAG(fast_clipped_poly)( GLcontext *ctx, const GLuint *elts,
+				    GLuint n )
+{
+   LOCAL_VARS
+   GLuint vertsize = GET_VERTEX_DWORDS();
+   GLuint *vb = (GLuint *)ALLOC_VERTS( (n-2) * 3, vertsize );
+   const GLuint *start = (const GLuint *)VERT(elts[0]);
+   int i,j;
+
+   for (i = 2 ; i < n ; i++) {
+      COPY_DWORDS( j, vb, vertsize, start );
+      COPY_DWORDS( j, vb, vertsize, VERT(elts[i-1]) );
+      COPY_DWORDS( j, vb, vertsize, VERT(elts[i]) );
+   }
+}
+
diff --git a/xc/extras/Mesa/src/tnl_dd/t_dd_vbtmp.h b/xc/extras/Mesa/src/tnl_dd/t_dd_vbtmp.h
index e8732057e..96f6bd0c3 100644
--- a/xc/extras/Mesa/src/tnl_dd/t_dd_vbtmp.h
+++ b/xc/extras/Mesa/src/tnl_dd/t_dd_vbtmp.h
@@ -188,15 +188,29 @@ static void TAG(emit)( GLcontext *ctx,
    }
 
    if (DO_SPEC) {
-      if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
-	 IMPORT_FLOAT_SPEC_COLORS( ctx );
-      spec = (GLubyte (*)[4])VB->SecondaryColorPtr[0]->Ptr;
-      spec_stride = VB->SecondaryColorPtr[0]->StrideB;
+      if (VB->SecondaryColorPtr[0]) {
+	 if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+	    IMPORT_FLOAT_SPEC_COLORS( ctx );
+	 spec = (GLubyte (*)[4])VB->SecondaryColorPtr[0]->Ptr;
+	 spec_stride = VB->SecondaryColorPtr[0]->StrideB;
+      } else {
+	 GLubyte tmp[4];
+	 spec = &tmp;
+	 spec_stride = 0;
+      }
    }
 
+
    if (DO_FOG) {
-      fog = VB->FogCoordPtr->data;
-      fog_stride = VB->FogCoordPtr->stride;
+      if (VB->FogCoordPtr) {
+	 fog = VB->FogCoordPtr->data;
+	 fog_stride = VB->FogCoordPtr->stride;
+      }
+      else {
+	 GLfloat tmp = 0;
+	 fog = &tmp;
+	 fog_stride = 0;
+      }
    }
 
    if (VB->importable_data) {
diff --git a/xc/extras/Mesa/src/tnl_dd/t_dd_vertex.h b/xc/extras/Mesa/src/tnl_dd/t_dd_vertex.h
index d67ad7bfc..a456f72b9 100644
--- a/xc/extras/Mesa/src/tnl_dd/t_dd_vertex.h
+++ b/xc/extras/Mesa/src/tnl_dd/t_dd_vertex.h
@@ -76,9 +76,3 @@ typedef union {
    GLubyte ub4[24][4];
 } TAG(Vertex), *TAG(VertexPtr);
 
-typedef struct {
-   GLfloat clip[4];
-   GLuint mask;
-   GLuint pad;			/* alignment */
-   TAG(Vertex) v;
-} TAG(TnlVertex), *TAG(TnlVertexPtr);
diff --git a/xc/lib/GL/glx/glxcmds.c b/xc/lib/GL/glx/glxcmds.c
index 6809d15ae..493ba1818 100644
--- a/xc/lib/GL/glx/glxcmds.c
+++ b/xc/lib/GL/glx/glxcmds.c
@@ -2171,9 +2171,9 @@ get_glx_proc_address(const char *funcName)
 
 
 #ifndef GLX_BUILT_IN_XMESA
-void (*glXGetProcAddressARB(const GLubyte *procName))()
+void (*glXGetProcAddressARB(const GLubyte *procName))( void )
 {
-   typedef void (*gl_function)();
+   typedef void (*gl_function)( void );
    gl_function f;
 
 #if defined(GLX_DIRECT_RENDERING)
diff --git a/xc/lib/GL/mesa/src/drv/Imakefile b/xc/lib/GL/mesa/src/drv/Imakefile
index 8c7068088..878799e84 100644
--- a/xc/lib/GL/mesa/src/drv/Imakefile
+++ b/xc/lib/GL/mesa/src/drv/Imakefile
@@ -5,7 +5,7 @@ XCOMM $XFree86: xc/lib/GL/mesa/src/drv/Imakefile,v 1.19 2001/03/23 20:56:33 dawe
 #define IHaveSubdirs
 #define PassCDebugFlags
 
-SUBDIRS = common DriDrivers
+SUBDIRS = common radeon 
 
 MakeSubdirs($(SUBDIRS))
 DependSubdirs($(SUBDIRS))
diff --git a/xc/lib/GL/mesa/src/drv/ffb/ffb_state.c b/xc/lib/GL/mesa/src/drv/ffb/ffb_state.c
index 4db602cb6..a729b1cec 100644
--- a/xc/lib/GL/mesa/src/drv/ffb/ffb_state.c
+++ b/xc/lib/GL/mesa/src/drv/ffb/ffb_state.c
@@ -468,7 +468,7 @@ static void ffbDDScissor(GLcontext *ctx, GLint cx, GLint cy,
 	ffbCalcViewport(ctx);
 }
 
-static GLboolean ffbDDSetDrawBuffer(GLcontext *ctx, GLenum buffer)
+static void ffbDDSetDrawBuffer(GLcontext *ctx, GLenum buffer)
 {
 	ffbContextPtr fmesa = FFB_CONTEXT(ctx);
 	unsigned int fbc = fmesa->fbc;
@@ -498,15 +498,13 @@ static GLboolean ffbDDSetDrawBuffer(GLcontext *ctx, GLenum buffer)
 		break;
 
 	default:
-		return GL_FALSE;
+		return;
 	};
 
 	if (fbc != fmesa->fbc) {
 		fmesa->fbc = fbc;
 		FFB_MAKE_DIRTY(fmesa, FFB_STATE_FBC, 1);
 	}
-
-	return GL_TRUE;
 }
 
 static void ffbDDSetReadBuffer(GLcontext *ctx, GLframebuffer *colorBuffer,
diff --git a/xc/lib/GL/mesa/src/drv/gamma/gamma_dd.c b/xc/lib/GL/mesa/src/drv/gamma/gamma_dd.c
index a6d8a849d..37922289b 100644
--- a/xc/lib/GL/mesa/src/drv/gamma/gamma_dd.c
+++ b/xc/lib/GL/mesa/src/drv/gamma/gamma_dd.c
@@ -105,6 +105,7 @@ void gammaDDInitExtensions( GLcontext *ctx )
 void gammaDDInitDriverFuncs( GLcontext *ctx )
 {
    ctx->Driver.GetBufferSize		= gammaDDGetBufferSize;
+   ctx->Driver.ResizeBuffers            = _swrast_alloc_buffers;
    ctx->Driver.GetString		= gammaDDGetString;
 
    ctx->Driver.Error			= NULL;
diff --git a/xc/lib/GL/mesa/src/drv/i810/i810context.c b/xc/lib/GL/mesa/src/drv/i810/i810context.c
index 890e0fda1..4b954eac8 100644
--- a/xc/lib/GL/mesa/src/drv/i810/i810context.c
+++ b/xc/lib/GL/mesa/src/drv/i810/i810context.c
@@ -300,13 +300,12 @@ void i810XMesaSetBackClipRects( i810ContextPtr imesa )
 static void i810XMesaWindowMoved( i810ContextPtr imesa )
 {
    switch (imesa->glCtx->Color.DriverDrawBuffer) {
-   case GL_FRONT_LEFT:
-      i810XMesaSetFrontClipRects( imesa );
-      break;
    case GL_BACK_LEFT:
       i810XMesaSetBackClipRects( imesa );
       break;
+   case GL_FRONT_LEFT:
    default:
+      i810XMesaSetFrontClipRects( imesa );
       break;
    }
 
diff --git a/xc/lib/GL/mesa/src/drv/i810/i810state.c b/xc/lib/GL/mesa/src/drv/i810/i810state.c
index 486ac543b..e5b5d4d24 100644
--- a/xc/lib/GL/mesa/src/drv/i810/i810state.c
+++ b/xc/lib/GL/mesa/src/drv/i810/i810state.c
@@ -276,7 +276,7 @@ static void i810RenderMode( GLcontext *ctx, GLenum mode )
 }
 
 
-static GLboolean i810SetDrawBuffer(GLcontext *ctx, GLenum mode )
+static void i810SetDrawBuffer(GLcontext *ctx, GLenum mode )
 {
    i810ContextPtr imesa = I810_CONTEXT(ctx);
 
@@ -290,7 +290,6 @@ static GLboolean i810SetDrawBuffer(GLcontext *ctx, GLenum mode )
       imesa->readMap = (char *)imesa->driScreen->pFB;
       i810XMesaSetFrontClipRects( imesa );
       FALLBACK( imesa, I810_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      return GL_TRUE;
    }
    else if (mode == GL_BACK_LEFT)
    {
@@ -302,11 +301,9 @@ static GLboolean i810SetDrawBuffer(GLcontext *ctx, GLenum mode )
       imesa->readMap = imesa->i810Screen->back.map;
       i810XMesaSetBackClipRects( imesa );
       FALLBACK( imesa, I810_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      return GL_TRUE;
    }
    else {
       FALLBACK( imesa, I810_FALLBACK_DRAW_BUFFER, GL_TRUE );
-      return GL_FALSE;
    }
 }
 
diff --git a/xc/lib/GL/mesa/src/drv/i830/i830_render.c b/xc/lib/GL/mesa/src/drv/i830/i830_render.c
index 15edda3bc..5416b6595 100644
--- a/xc/lib/GL/mesa/src/drv/i830/i830_render.c
+++ b/xc/lib/GL/mesa/src/drv/i830/i830_render.c
@@ -1,4 +1,4 @@
-/* $Id: i830_render.c,v 1.1 2002/05/29 21:21:48 jhartmann Exp $ */
+/* $Id: i830_render.c,v 1.2 2002/06/12 15:50:25 keithw Exp $ */
 
 /*
  * Intel i810 DRI driver for Mesa 3.5
diff --git a/xc/lib/GL/mesa/src/drv/mga/mgabuffers.c b/xc/lib/GL/mesa/src/drv/mga/mgabuffers.c
index c1790b9a5..7c85a6237 100644
--- a/xc/lib/GL/mesa/src/drv/mga/mgabuffers.c
+++ b/xc/lib/GL/mesa/src/drv/mga/mgabuffers.c
@@ -234,7 +234,7 @@ void mgaUpdateRects( mgaContextPtr mmesa, GLuint buffers )
 
 
 
-GLboolean mgaDDSetDrawBuffer(GLcontext *ctx, GLenum mode )
+void mgaDDSetDrawBuffer(GLcontext *ctx, GLenum mode )
 {
    mgaContextPtr mmesa = MGA_CONTEXT(ctx);
 
@@ -250,7 +250,6 @@ GLboolean mgaDDSetDrawBuffer(GLcontext *ctx, GLenum mode )
       mmesa->draw_buffer = MGA_FRONT;
       mgaXMesaSetFrontClipRects( mmesa );
       FALLBACK( ctx, MGA_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      return GL_TRUE;
    }
    else if (mode == GL_BACK_LEFT)
    {
@@ -261,12 +260,10 @@ GLboolean mgaDDSetDrawBuffer(GLcontext *ctx, GLenum mode )
       mmesa->dirty |= MGA_UPLOAD_CONTEXT;
       mgaXMesaSetBackClipRects( mmesa );
       FALLBACK( ctx, MGA_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      return GL_TRUE;
    }
    else
    {
       FALLBACK( ctx, MGA_FALLBACK_DRAW_BUFFER, GL_TRUE );
-      return GL_FALSE;
    }
 }
 
diff --git a/xc/lib/GL/mesa/src/drv/mga/mgabuffers.h b/xc/lib/GL/mesa/src/drv/mga/mgabuffers.h
index d3d5f9502..8780ea5a8 100644
--- a/xc/lib/GL/mesa/src/drv/mga/mgabuffers.h
+++ b/xc/lib/GL/mesa/src/drv/mga/mgabuffers.h
@@ -29,7 +29,7 @@
 #ifndef MGA_BUFFERS_H
 #define MGA_BUFFERS_H
 
-GLboolean mgaDDSetDrawBuffer(GLcontext *ctx, GLenum mode );
+void mgaDDSetDrawBuffer(GLcontext *ctx, GLenum mode );
 
 void mgaUpdateRects( mgaContextPtr mmesa, GLuint buffers );
 
diff --git a/xc/lib/GL/mesa/src/drv/r128/r128_state.c b/xc/lib/GL/mesa/src/drv/r128/r128_state.c
index e989290bc..f900ad5eb 100644
--- a/xc/lib/GL/mesa/src/drv/r128/r128_state.c
+++ b/xc/lib/GL/mesa/src/drv/r128/r128_state.c
@@ -678,10 +678,9 @@ static void r128DDLogicOpCode( GLcontext *ctx, GLenum opcode )
    }
 }
 
-static GLboolean r128DDSetDrawBuffer( GLcontext *ctx, GLenum mode )
+static void r128DDSetDrawBuffer( GLcontext *ctx, GLenum mode )
 {
    r128ContextPtr rmesa = R128_CONTEXT(ctx);
-   int found = GL_TRUE;
 
    FLUSH_BATCH( rmesa );
 
@@ -701,7 +700,6 @@ static GLboolean r128DDSetDrawBuffer( GLcontext *ctx, GLenum mode )
 	 break;
       default:
 	 FALLBACK( rmesa, R128_FALLBACK_DRAW_BUFFER, GL_TRUE );
-	 found = GL_FALSE;
 	 break;
       }
 
@@ -709,8 +707,6 @@ static GLboolean r128DDSetDrawBuffer( GLcontext *ctx, GLenum mode )
 					 (rmesa->drawOffset >> 5));
       rmesa->new_state |= R128_NEW_WINDOW;
    }
-
-   return found;
 }
 
 
diff --git a/xc/lib/GL/mesa/src/drv/radeon/Imakefile.inc b/xc/lib/GL/mesa/src/drv/radeon/Imakefile.inc
index 58c3f4fba..559bb23c3 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/Imakefile.inc
+++ b/xc/lib/GL/mesa/src/drv/radeon/Imakefile.inc
@@ -26,7 +26,8 @@ ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL
 		-I$(XF86DRIVERSRC)/ati \
 		-I$(XF86COMSRC) \
 		-I$(GLXLIBSRC)/dri/drm \
-		-I$(GLXLIBSRC)/include
+		-I$(GLXLIBSRC)/include \
+		-I$(XTOP)/include
 #endif
 
 MESA_INCLUDES = -I$(MESASRCDIR)/src \
@@ -35,81 +36,129 @@ MESA_INCLUDES = -I$(MESASRCDIR)/src \
    X_INCLUDES = -I$(XINCLUDESRC) -I$(EXTINCSRC)
 
    RADEONSRCS = $(MESADRVRADEONBUILDDIR)radeon_context.c \
+		$(MESADRVRADEONBUILDDIR)radeon_compat.c \
 		$(MESADRVRADEONBUILDDIR)radeon_ioctl.c \
 		$(MESADRVRADEONBUILDDIR)radeon_lock.c \
-		$(MESADRVRADEONBUILDDIR)radeon_render.c \
+		$(MESADRVRADEONBUILDDIR)radeon_maos.c \
+	        $(MESADRVRADEONBUILDDIR)radeon_sanity.c \
 	        $(MESADRVRADEONBUILDDIR)radeon_screen.c \
 		$(MESADRVRADEONBUILDDIR)radeon_span.c \
 		$(MESADRVRADEONBUILDDIR)radeon_state.c \
+		$(MESADRVRADEONBUILDDIR)radeon_state_init.c \
+		$(MESADRVRADEONBUILDDIR)radeon_swtcl.c \
+		$(MESADRVRADEONBUILDDIR)radeon_tcl.c \
 		$(MESADRVRADEONBUILDDIR)radeon_tex.c \
 		$(MESADRVRADEONBUILDDIR)radeon_texmem.c \
 		$(MESADRVRADEONBUILDDIR)radeon_texstate.c \
-		$(MESADRVRADEONBUILDDIR)radeon_tris.c \
-		$(MESADRVRADEONBUILDDIR)radeon_vb.c 
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt.c \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt_x86.c \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxtmp_x86.S \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt_sse.c \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt_c.c
 
    RADEONOBJS = $(MESADRVRADEONBUILDDIR)radeon_context.o \
+		$(MESADRVRADEONBUILDDIR)radeon_compat.o \
 		$(MESADRVRADEONBUILDDIR)radeon_ioctl.o \
 		$(MESADRVRADEONBUILDDIR)radeon_lock.o \
-		$(MESADRVRADEONBUILDDIR)radeon_render.o \
+		$(MESADRVRADEONBUILDDIR)radeon_maos.o \
+	        $(MESADRVRADEONBUILDDIR)radeon_sanity.o \
 	        $(MESADRVRADEONBUILDDIR)radeon_screen.o \
 		$(MESADRVRADEONBUILDDIR)radeon_span.o \
 		$(MESADRVRADEONBUILDDIR)radeon_state.o \
+		$(MESADRVRADEONBUILDDIR)radeon_state_init.o \
+		$(MESADRVRADEONBUILDDIR)radeon_swtcl.o \
+		$(MESADRVRADEONBUILDDIR)radeon_tcl.o \
 		$(MESADRVRADEONBUILDDIR)radeon_tex.o \
 		$(MESADRVRADEONBUILDDIR)radeon_texmem.o \
 		$(MESADRVRADEONBUILDDIR)radeon_texstate.o \
-		$(MESADRVRADEONBUILDDIR)radeon_tris.o \
-		$(MESADRVRADEONBUILDDIR)radeon_vb.o 
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt.o \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt_c.o \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt_x86.o \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxtmp_x86.o \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt_sse.o 
 
    RADEONUOBJS = $(MESADRVRADEONBUILDDIR)unshared/radeon_context.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_compat.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_ioctl.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_lock.o \
-		$(MESADRVRADEONBUILDDIR)unshared/radeon_render.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_maos.o \
+	        $(MESADRVRADEONBUILDDIR)unshared/radeon_sanity.o \
 	        $(MESADRVRADEONBUILDDIR)unshared/radeon_screen.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_span.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_state.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_state_init.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_swtcl.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_tcl.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_tex.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_texmem.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_texstate.o \
-		$(MESADRVRADEONBUILDDIR)unshared/radeon_tris.o \
-		$(MESADRVRADEONBUILDDIR)unshared/radeon_vb.o 
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_vtxfmt.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_vtxfmt_c.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_vtxfmt_x86.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_vtxtmp_x86.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_vtxfmt_sse.o 
 
    RADEONDOBJS = $(MESADRVRADEONBUILDDIR)debugger/radeon_context.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_compat.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_ioctl.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_lock.o \
-		$(MESADRVRADEONBUILDDIR)debugger/radeon_render.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_maos.o \
+	        $(MESADRVRADEONBUILDDIR)debugger/radeon_sanity.o \
 	        $(MESADRVRADEONBUILDDIR)debugger/radeon_screen.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_span.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_state.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_state_init.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_swtcl.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_tcl.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_tex.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_texmem.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_texstate.o \
-		$(MESADRVRADEONBUILDDIR)debugger/radeon_tris.o \
-		$(MESADRVRADEONBUILDDIR)debugger/radeon_vb.o 
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_vtxfmt.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_vtxfmt_c.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_vtxfmt_x86.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_vtxtmp_x86.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_vtxfmt_sse.o 
 
    RADEONPOBJS = $(MESADRVRADEONBUILDDIR)profiled/radeon_context.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_compat.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_ioctl.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_lock.o \
-		$(MESADRVRADEONBUILDDIR)profiled/radeon_render.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_maos.o \
+	        $(MESADRVRADEONBUILDDIR)profiled/radeon_sanity.o \
 	        $(MESADRVRADEONBUILDDIR)profiled/radeon_screen.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_span.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_state.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_state_init.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_swtcl.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_tcl.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_tex.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_texmem.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_texstate.o \
-		$(MESADRVRADEONBUILDDIR)profiled/radeon_tris.o \
-		$(MESADRVRADEONBUILDDIR)profiled/radeon_vb.o 
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_vtxfmt.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_vtxfmt_c.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_vtxfmt_x86.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_vtxtmp_x86.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_vtxfmt_sse.o 
 
 #ifdef NeedToLinkMesaSrc
 LinkSourceFile(radeon_context.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_compat.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_ioctl.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_lock.c, $(MESADRVSRCDIR)/radeon)
-LinkSourceFile(radeon_render.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_maos.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_sanity.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_screen.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_span.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_state.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_state_init.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_swtcl.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_tcl.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_tex.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_texmem.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_texstate.c, $(MESADRVSRCDIR)/radeon)
-LinkSourceFile(radeon_tris.c, $(MESADRVSRCDIR)/radeon)
-LinkSourceFile(radeon_vb.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_vtxfmt.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_vtxfmt_c.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_vtxfmt_x86.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_vtxtmp_x86.S, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_vtxfmt_sse.c, $(MESADRVSRCDIR)/radeon)
 #endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_compat.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_compat.c
new file mode 100644
index 000000000..405636465
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_compat.c
@@ -0,0 +1,301 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+               Tungsten Graphics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "mem.h"
+
+static struct { 
+	int start; 
+	int len; 
+	const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+	{ RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+	{ RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+	{ RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+	{ RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+	{ RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+	{ RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+	{ RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+	{ RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+	{ RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+	{ RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+	{ RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+	{ RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+	{ RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+	{ RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+	{ RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+	{ RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+	{ RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+	{ RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+};
+
+
+static void radeonCompatEmitPacket( radeonContextPtr rmesa, 
+				    struct radeon_state_atom *state )
+{
+   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+   radeon_context_regs_t *ctx = &sarea->ContextState;
+   radeon_texture_regs_t *tex0 = &sarea->TexState[0];
+   radeon_texture_regs_t *tex1 = &sarea->TexState[1];
+   int i;
+   int *buf = state->cmd;
+
+   for ( i = 0 ; i < state->cmd_size ; ) {
+      drmRadeonCmdHeader *header = (drmRadeonCmdHeader *)&buf[i++];
+
+      if (RADEON_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id,
+		 packet[(int)header->packet.packet_id].name);
+
+      switch (header->packet.packet_id) {
+      case RADEON_EMIT_PP_MISC:
+	 ctx->pp_misc = buf[i++]; 
+	 ctx->pp_fog_color = buf[i++];
+	 ctx->re_solid_color = buf[i++];
+	 ctx->rb3d_blendcntl = buf[i++];
+	 ctx->rb3d_depthoffset = buf[i++];
+	 ctx->rb3d_depthpitch = buf[i++];
+	 ctx->rb3d_zstencilcntl = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+	 break;
+      case RADEON_EMIT_PP_CNTL:
+	 ctx->pp_cntl = buf[i++];
+	 ctx->rb3d_cntl = buf[i++];
+	 ctx->rb3d_coloroffset = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+	 break;
+      case RADEON_EMIT_RB3D_COLORPITCH:
+	 ctx->rb3d_colorpitch = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+	 break;
+      case RADEON_EMIT_RE_LINE_PATTERN:
+	 ctx->re_line_pattern = buf[i++];
+	 ctx->re_line_state = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_LINE;
+	 break;
+      case RADEON_EMIT_SE_LINE_WIDTH:
+	 ctx->se_line_width = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_LINE;
+	 break;
+      case RADEON_EMIT_PP_LUM_MATRIX:
+	 ctx->pp_lum_matrix = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
+	 break;
+      case RADEON_EMIT_PP_ROT_MATRIX_0:
+	 ctx->pp_rot_matrix_0 = buf[i++];
+	 ctx->pp_rot_matrix_1 = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
+	 break;
+      case RADEON_EMIT_RB3D_STENCILREFMASK:
+	 ctx->rb3d_stencilrefmask = buf[i++];
+	 ctx->rb3d_ropcntl = buf[i++];
+	 ctx->rb3d_planemask = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_MASKS;
+	 break;
+      case RADEON_EMIT_SE_VPORT_XSCALE:
+	 ctx->se_vport_xscale = buf[i++];
+	 ctx->se_vport_xoffset = buf[i++];
+	 ctx->se_vport_yscale = buf[i++];
+	 ctx->se_vport_yoffset = buf[i++];
+	 ctx->se_vport_zscale = buf[i++];
+	 ctx->se_vport_zoffset = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_VIEWPORT;
+	 break;
+      case RADEON_EMIT_SE_CNTL:
+	 ctx->se_cntl = buf[i++];
+	 ctx->se_coord_fmt = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT;
+	 break;
+      case RADEON_EMIT_SE_CNTL_STATUS:
+	 ctx->se_cntl_status = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_SETUP;
+	 break;
+      case RADEON_EMIT_RE_MISC:
+	 ctx->re_misc = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_MISC;
+	 break;
+      case RADEON_EMIT_PP_TXFILTER_0:
+	 tex0->pp_txfilter = buf[i++];
+	 tex0->pp_txformat = buf[i++];
+	 tex0->pp_txoffset = buf[i++];
+	 tex0->pp_txcblend = buf[i++];
+	 tex0->pp_txablend = buf[i++];
+	 tex0->pp_tfactor = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX0;
+	 break;
+      case RADEON_EMIT_PP_BORDER_COLOR_0:
+	 tex0->pp_border_color = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX0;
+	 break;
+      case RADEON_EMIT_PP_TXFILTER_1:
+	 tex1->pp_txfilter = buf[i++];
+	 tex1->pp_txformat = buf[i++];
+	 tex1->pp_txoffset = buf[i++];
+	 tex1->pp_txcblend = buf[i++];
+	 tex1->pp_txablend = buf[i++];
+	 tex1->pp_tfactor = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX1;
+	 break;
+      case RADEON_EMIT_PP_BORDER_COLOR_1:
+	 tex1->pp_border_color = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX1;
+	 break;
+
+      case RADEON_EMIT_SE_ZBIAS_FACTOR:
+	 i++;
+	 i++;
+	 break;
+
+      case RADEON_EMIT_PP_TXFILTER_2:
+      case RADEON_EMIT_PP_BORDER_COLOR_2:
+      case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
+      case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
+      default:
+	 /* These states aren't understood by radeon drm 1.1 */
+	 fprintf(stderr, "Tried to emit unsupported state\n");
+	 return;
+      }
+   }
+}
+
+
+
+static void radeonCompatEmitStateLocked( radeonContextPtr rmesa )
+{
+   struct radeon_state_atom *state, *tmp;
+
+   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->lost_context) {
+      if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
+	 fprintf(stderr, "%s - lost context\n", __FUNCTION__); 
+
+      foreach_s( state, tmp, &(rmesa->hw.clean) ) 
+	 move_to_tail(&(rmesa->hw.dirty), state );
+
+      rmesa->lost_context = 0;
+   }
+
+   foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
+      if (!state->is_tcl)
+	 radeonCompatEmitPacket( rmesa, state );
+      move_to_head( &(rmesa->hw.clean), state );
+   }
+}
+
+
+
+static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa,
+					     GLuint hw_primitive,
+					     GLuint nverts,
+					     XF86DRIClipRectPtr pbox,
+					     GLuint nbox )
+{
+   int i;
+
+   for ( i = 0 ; i < nbox ; ) {
+      int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox );
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      drmRadeonVertex vtx;
+      
+      rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS;
+      rmesa->sarea->nbox = nr - i;
+
+      for ( ; i < nr ; i++) 
+	 *b++ = pbox[i];
+      
+      if (RADEON_DEBUG & DEBUG_IOCTL)
+	 fprintf(stderr, 
+		 "RadeonFlushVertexBuffer: prim %x buf %d verts %d "
+		 "disc %d nbox %d\n",
+		 hw_primitive, 
+		 rmesa->dma.current.buf->buf->idx, 
+		 nverts, 
+		 nr == nbox,
+		 rmesa->sarea->nbox );
+
+      vtx.prim = hw_primitive;
+      vtx.idx = rmesa->dma.current.buf->buf->idx;
+      vtx.count = nverts;
+      vtx.discard = (nr == nbox);      
+
+      drmCommandWrite( rmesa->dri.fd, 
+		       DRM_RADEON_VERTEX,
+		       &vtx, sizeof(vtx));
+   }
+}
+
+
+
+/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer!  
+ */
+void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
+				GLuint vertex_format,
+				GLuint hw_primitive,
+				GLuint nrverts )
+{
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   LOCK_HARDWARE( rmesa );
+
+   radeonCompatEmitStateLocked( rmesa );
+   rmesa->sarea->vc_format = vertex_format;
+   
+   if (rmesa->state.scissor.enabled) {
+      radeonCompatEmitPrimitiveLocked( rmesa, 
+				       hw_primitive,
+				       nrverts,
+				       rmesa->state.scissor.pClipRects,
+				       rmesa->state.scissor.numClipRects );
+   }
+   else {
+      radeonCompatEmitPrimitiveLocked( rmesa, 
+				       hw_primitive,
+				       nrverts,
+				       rmesa->pClipRects,
+				       rmesa->numClipRects );
+   }
+
+
+   UNLOCK_HARDWARE( rmesa );
+}
+
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c
index 1f1e27062..a2e616060 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c
@@ -31,6 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  * Authors:
  *   Kevin E. Martin <martin@valinux.com>
  *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
  *
  */
 
@@ -41,8 +42,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_state.h"
 #include "radeon_span.h"
 #include "radeon_tex.h"
-#include "radeon_tris.h"
-#include "radeon_vb.h"
+#include "radeon_swtcl.h"
+#include "radeon_tcl.h"
+#include "radeon_vtxfmt.h"
 
 #include "swrast/swrast.h"
 #include "swrast_setup/swrast_setup.h"
@@ -51,6 +53,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "tnl/tnl.h"
 #include "tnl/t_pipeline.h"
 
+#include "attrib.h"
+#include "api_arrayelt.h"
 #include "context.h"
 #include "simple_list.h"
 #include "mem.h"
@@ -60,27 +64,18 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "X86/common_x86_asm.h"
 #endif
 
-#define RADEON_DATE	"20020221"
+#define RADEON_DATE	"20020611"
 
 #ifndef RADEON_DEBUG
-int RADEON_DEBUG = (0
-/*		    | DEBUG_ALWAYS_SYNC */
-/*		    | DEBUG_VERBOSE_API */
-/*		    | DEBUG_VERBOSE_MSG */
-/*		    | DEBUG_VERBOSE_LRU */
-/*		    | DEBUG_VERBOSE_DRI */
-/*		    | DEBUG_VERBOSE_IOCTL */
-/*		    | DEBUG_VERBOSE_2D */
-/*		    | DEBUG_VERBOSE_TEXTURE */
-   );
+int RADEON_DEBUG = (0);
 #endif
 
 
 
-/* Return the width and height of the current color buffer.
+/* Return the width and height of the given buffer.
  */
 static void radeonGetBufferSize( GLframebuffer *buffer,
-				   GLuint *width, GLuint *height )
+				 GLuint *width, GLuint *height )
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
@@ -100,10 +95,10 @@ static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
 
    switch ( name ) {
    case GL_VENDOR:
-      return (GLubyte *)"VA Linux Systems, Inc.";
+      return (GLubyte *)"Tungsten Graphics, Inc.";
 
    case GL_RENDERER:
-      sprintf( buffer, "Mesa DRI Radeon " RADEON_DATE );
+      sprintf( buffer, "Mesa DRI Radeon " RADEON_DATE);
 
       /* Append any chipset-specific information.  None yet.
        */
@@ -144,6 +139,18 @@ static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
       }
 #endif
 #endif
+
+      if ( rmesa->dri.drmMinor < RADEON_DRM_CURRENT ) {
+	 strncat( buffer, " DRM-COMPAT", 11 );
+      }
+	 
+      if ( !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE) ) {
+	 strncat( buffer, " TCL", 4 );
+      }
+      else {
+	 strncat( buffer, " NO-TCL", 7 );
+      }
+
       return (GLubyte *)buffer;
 
    default:
@@ -151,41 +158,6 @@ static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
    }
 }
 
-/* Send all commands to the hardware.  If vertex buffers or indirect
- * buffers are in use, then we need to make sure they are sent to the
- * hardware.  All commands that are normally sent to the ring are
- * already considered `flushed'.
- */
-static void radeonFlush( GLcontext *ctx )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   RADEON_FIREVERTICES( rmesa );
-
-   if ( rmesa->boxes ) {
-      LOCK_HARDWARE( rmesa );
-      radeonPerformanceBoxesLocked( rmesa );
-      UNLOCK_HARDWARE( rmesa );
-   }
-
-   /* Log the performance counters if necessary */
-   radeonPerformanceCounters( rmesa );
-}
-
-/* Make sure all commands have been sent to the hardware and have
- * completed processing.
- */
-static void radeonFinish( GLcontext *ctx )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   /* Bump the performance counter */
-   rmesa->c_drawWaits++;
-   radeonFlush( ctx );
-   LOCK_HARDWARE( rmesa );
-   radeonWaitForIdleLocked( rmesa );
-   UNLOCK_HARDWARE( rmesa );
-}
 
 
 /* Initialize the extensions supported by this driver.
@@ -204,23 +176,43 @@ static void radeonInitExtensions( GLcontext *ctx )
    _mesa_enable_extension( ctx, "GL_EXT_texture_filter_anisotropic" );
    _mesa_enable_extension( ctx, "GL_EXT_texture_lod_bias" );
 
+/*     _mesa_enable_extension( ctx, "GL_EXT_fog_coord" ); */
+   _mesa_enable_extension( ctx, "GL_EXT_secondary_color" );
+   _mesa_enable_extension( ctx, "GL_EXT_blend_subtract" );
 }
 
 extern const struct gl_pipeline_stage _radeon_render_stage;
-extern const struct gl_pipeline_stage _radeon_tcl_render_stage;
+extern const struct gl_pipeline_stage _radeon_tcl_stage;
 
 static const struct gl_pipeline_stage *radeon_pipeline[] = {
+
+   /* Try and go straight to t&l
+    */
+   &_radeon_tcl_stage,  
+
+   /* Catch any t&l fallbacks
+    */
    &_tnl_vertex_transform_stage,
    &_tnl_normal_transform_stage,
    &_tnl_lighting_stage,
    &_tnl_fog_coordinate_stage,
    &_tnl_texgen_stage,
    &_tnl_texture_transform_stage,
-				/* REMOVE: point attenuation stage */
-#if 1
-   &_radeon_render_stage,	/* ADD: unclipped rastersetup-to-dma */
-#endif
-   &_tnl_render_stage,
+
+   /* Try again to go to tcl? 
+    *     - no good for asymmetric-twoside (do with multipass)
+    *     - no good for asymmetric-unfilled (do with multipass)
+    *     - good for material
+    *     - good for texgen
+    *     - need to manipulate a bit of state
+    *
+    * - worth it/not worth it?
+    */
+			
+   /* Else do them here.
+    */
+   &_radeon_render_stage,
+   &_tnl_render_stage,		/* FALLBACK:  */
    0,
 };
 
@@ -233,8 +225,6 @@ static void radeonInitDriverFuncs( GLcontext *ctx )
     ctx->Driver.GetBufferSize		= radeonGetBufferSize;
     ctx->Driver.ResizeBuffers           = _swrast_alloc_buffers;
     ctx->Driver.GetString		= radeonGetString;
-    ctx->Driver.Finish			= radeonFinish;
-    ctx->Driver.Flush			= radeonFlush;
 
     ctx->Driver.Error			= NULL;
     ctx->Driver.DrawPixels		= NULL;
@@ -287,11 +277,20 @@ radeonCreateContext( Display *dpy, const __GLcontextModes *glVisual,
    rmesa->dri.hwLock = &sPriv->pSAREA->lock;
    rmesa->dri.fd = sPriv->fd;
 
+   /* If we don't have 1.3, fallback to the 1.1 interfaces.
+    */
+   if (getenv("RADEON_COMPAT") || sPriv->drmMinor < RADEON_DRM_CURRENT) 
+      rmesa->dri.drmMinor = 1;
+   else
+      rmesa->dri.drmMinor = sPriv->drmMinor;
+
    rmesa->radeonScreen = radeonScreen;
    rmesa->sarea = (RADEONSAREAPrivPtr)((GLubyte *)sPriv->pSAREA +
 				       radeonScreen->sarea_priv_offset);
 
 
+   rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address;
+
    for ( i = 0 ; i < radeonScreen->numTexHeaps ; i++ ) {
       make_empty_list( &rmesa->texture.objects[i] );
       rmesa->texture.heap[i] = mmInit( 0, radeonScreen->texSize[i] );
@@ -300,9 +299,8 @@ radeonCreateContext( Display *dpy, const __GLcontextModes *glVisual,
    rmesa->texture.numHeaps = radeonScreen->numTexHeaps;
    make_empty_list( &rmesa->texture.swapped );
 
-   rmesa->RenderIndex = ~0;
-   rmesa->state.hw.dirty = RADEON_UPLOAD_CONTEXT_ALL;
-   rmesa->upload_cliprects = 1;
+   rmesa->swtcl.RenderIndex = ~0;
+   rmesa->lost_context = 1;
 
    /* KW: Set the maximum texture size small enough that we can
     * guarentee that both texture units can bind a maximal texture
@@ -339,17 +337,20 @@ radeonCreateContext( Display *dpy, const __GLcontextModes *glVisual,
    ctx->Const.MaxLineWidthAA = 10.0;
    ctx->Const.LineWidthGranularity = 0.0625;
 
+   /* Set maxlocksize (and hence vb size) small enough to avoid
+    * fallbacks in radeon_tcl.c.  ie. guarentee that all vertices can
+    * fit in a single dma buffer for indexed rendering of quad strips,
+    * etc.
+    */
+/*     ctx->Const.MaxArrayLockSize =  */
+/*        MIN2( ctx->Const.MaxArrayLockSize, */
+/*  	    RADEON_BUFFER_SIZE / RADEON_MAX_TCL_VERTSIZE ); */
+
    if (getenv("LIBGL_PERFORMANCE_BOXES"))
       rmesa->boxes = 1;
    else
       rmesa->boxes = 0;
 
-   {
-      const char *debug = getenv("LIBGL_DEBUG");
-      if (debug && strstr(debug, "fallbacks")) {
-         rmesa->debugFallbacks = GL_TRUE;
-      }
-   }
 
    /* Initialize the software rasterizer and helper modules.
     */
@@ -357,29 +358,140 @@ radeonCreateContext( Display *dpy, const __GLcontextModes *glVisual,
    _ac_CreateContext( ctx );
    _tnl_CreateContext( ctx );
    _swsetup_CreateContext( ctx );
-
+   _ae_create_context( ctx );
 
    /* Install the customized pipeline:
     */
    _tnl_destroy_pipeline( ctx );
    _tnl_install_pipeline( ctx, radeon_pipeline );
 
+   /* Try and keep materials and vertices separate:
+    */
+   _tnl_isolate_materials( ctx, GL_TRUE );
+
+
+/*     _mesa_allow_light_in_model( ctx, GL_FALSE ); */
+
    /* Configure swrast to match hardware characteristics:
     */
    _swrast_allow_pixel_fog( ctx, GL_FALSE );
    _swrast_allow_vertex_fog( ctx, GL_TRUE );
 
-   radeonInitVB( ctx );
+
+   _math_matrix_ctr( &rmesa->TexGenMatrix[0] );
+   _math_matrix_ctr( &rmesa->TexGenMatrix[1] );
+   _math_matrix_ctr( &rmesa->tmpmat );
+   _math_matrix_set_identity( &rmesa->TexGenMatrix[0] );
+   _math_matrix_set_identity( &rmesa->TexGenMatrix[1] );
+   _math_matrix_set_identity( &rmesa->tmpmat );
+
    radeonInitExtensions( ctx );
    radeonInitDriverFuncs( ctx );
    radeonInitIoctlFuncs( ctx );
    radeonInitStateFuncs( ctx );
    radeonInitSpanFuncs( ctx );
    radeonInitTextureFuncs( ctx );
-   radeonInitTriFuncs( ctx );
-
    radeonInitState( rmesa );
+   radeonInitSwtcl( ctx );
+
+   
+
+#if DO_DEBUG
+   if (getenv("RADEON_DEBUG_FALLBACKS"))
+      RADEON_DEBUG |= DEBUG_FALLBACKS;
+
+   if (getenv("RADEON_DEBUG_TEXTURE"))
+      RADEON_DEBUG |= DEBUG_TEXTURE;
+
+   if (getenv("RADEON_DEBUG_IOCTL"))
+      RADEON_DEBUG |= DEBUG_IOCTL;
+
+   if (getenv("RADEON_DEBUG_PRIMS"))
+      RADEON_DEBUG |= DEBUG_PRIMS;
+
+   if (getenv("RADEON_DEBUG_VERTS"))
+      RADEON_DEBUG |= DEBUG_VERTS;
+
+   if (getenv("RADEON_DEBUG_STATE"))
+      RADEON_DEBUG |= DEBUG_STATE;
+
+   if (getenv("RADEON_DEBUG_CODEGEN"))
+      RADEON_DEBUG |= DEBUG_CODEGEN;
+
+   if (getenv("RADEON_DEBUG_VTXFMT"))
+      RADEON_DEBUG |= DEBUG_VFMT;
+
+   if (getenv("RADEON_DEBUG_VERBOSE"))
+      RADEON_DEBUG |= DEBUG_VERBOSE;
+
+   if (getenv("RADEON_DEBUG_DRI"))
+      RADEON_DEBUG |= DEBUG_DRI;
 
+   if (getenv("RADEON_DEBUG_DMA"))
+      RADEON_DEBUG |= DEBUG_DMA;
+
+   if (getenv("RADEON_DEBUG_SANITY"))
+      RADEON_DEBUG |= DEBUG_SANITY;
+
+   if (getenv("RADEON_DEBUG"))
+   {
+      const char *debug = getenv("RADEON_DEBUG");
+      if (strstr(debug, "fall")) 
+         RADEON_DEBUG |= DEBUG_FALLBACKS;
+
+      if (strstr(debug, "tex")) 
+         RADEON_DEBUG |= DEBUG_TEXTURE;
+
+      if (strstr(debug, "ioctl")) 
+         RADEON_DEBUG |= DEBUG_IOCTL;
+
+      if (strstr(debug, "prim")) 
+         RADEON_DEBUG |= DEBUG_PRIMS;
+
+      if (strstr(debug, "vert")) 
+         RADEON_DEBUG |= DEBUG_VERTS;
+
+      if (strstr(debug, "state")) 
+         RADEON_DEBUG |= DEBUG_STATE;
+
+      if (strstr(debug, "code")) 
+         RADEON_DEBUG |= DEBUG_CODEGEN;
+
+      if (strstr(debug, "vfmt") || strstr(debug, "vtxf")) 
+         RADEON_DEBUG |= DEBUG_VFMT;
+
+      if (strstr(debug, "verb")) 
+         RADEON_DEBUG |= DEBUG_VERBOSE;
+
+      if (strstr(debug, "dri")) 
+         RADEON_DEBUG |= DEBUG_DRI;
+
+      if (strstr(debug, "dma")) 
+         RADEON_DEBUG |= DEBUG_DMA;
+
+      if (strstr(debug, "san")) 
+         RADEON_DEBUG |= DEBUG_SANITY;
+   }
+
+
+#endif
+
+   if (getenv("RADEON_NO_RAST")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1); 
+   }
+   else if (getenv("RADEON_NO_TCL") || 
+       rmesa->radeonScreen->chipset == RADEON_CHIPSET_MOBILITY ||
+       rmesa->dri.drmMinor < RADEON_DRM_CURRENT) {
+      fprintf(stderr, "disabling TCL support\n");
+      TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1); 
+   }
+   else {
+      if (!getenv("RADEON_NO_VTXFMT"))
+	 radeonVtxfmtInit( ctx );
+
+      _tnl_need_dlist_norm_lengths( ctx, GL_FALSE );
+   }
    return GL_TRUE;
 }
 
@@ -427,12 +539,21 @@ radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
       _ac_DestroyContext( rmesa->glCtx );
       _swrast_DestroyContext( rmesa->glCtx );
 
-      radeonFreeVB( rmesa->glCtx );
+      radeonDestroySwtcl( rmesa->glCtx );
+
+      if (!rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+	 if (!getenv("RADEON_NO_VTXFMT"))
+	    radeonVtxfmtDestroy( rmesa->glCtx );
 
       /* free the Mesa context */
       rmesa->glCtx->DriverCtx = NULL;
       _mesa_destroy_context( rmesa->glCtx );
 
+      if (rmesa->state.scissor.pClipRects) {
+	 FREE(rmesa->state.scissor.pClipRects);
+	 rmesa->state.scissor.pClipRects = 0;
+      }
+
       FREE( rmesa );
    }
 
@@ -533,21 +654,14 @@ radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
                    __DRIdrawablePrivate *driReadPriv )
 {
    if ( driContextPriv ) {
-      GET_CURRENT_CONTEXT(ctx);
-      radeonContextPtr oldRadeonCtx = ctx ? RADEON_CONTEXT(ctx) : NULL;
-      radeonContextPtr newRadeonCtx = (radeonContextPtr) driContextPriv->driverPrivate;
-
-      if ( newRadeonCtx != oldRadeonCtx ) {
-	 newRadeonCtx->state.hw.dirty = RADEON_UPLOAD_CONTEXT_ALL;
-	 if ( newRadeonCtx->state.texture.unit[0].texobj )
-	    newRadeonCtx->state.hw.dirty |= RADEON_UPLOAD_TEX0;
-	 if ( newRadeonCtx->state.texture.unit[1].texobj )
-	    newRadeonCtx->state.hw.dirty |= RADEON_UPLOAD_TEX1;
-      }
+      radeonContextPtr newRadeonCtx = 
+	 (radeonContextPtr) driContextPriv->driverPrivate;
+
+      if (RADEON_DEBUG & DEBUG_DRI)
+	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, newRadeonCtx->glCtx);
 
       if ( newRadeonCtx->dri.drawable != driDrawPriv ) {
 	 newRadeonCtx->dri.drawable = driDrawPriv;
-	 newRadeonCtx->upload_cliprects = 1;
 	 radeonUpdateWindow( newRadeonCtx->glCtx );
 	 radeonUpdateViewportOffset( newRadeonCtx->glCtx );
       }
@@ -560,10 +674,18 @@ radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
 	 _mesa_set_viewport( newRadeonCtx->glCtx, 0, 0,
 			     driDrawPriv->w, driDrawPriv->h );
       }
+
+      if (newRadeonCtx->vb.enabled)
+	 radeonVtxfmtMakeCurrent( newRadeonCtx->glCtx );
+
    } else {
+      if (RADEON_DEBUG & DEBUG_DRI)
+	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, NULL);
       _mesa_make_current( 0, 0 );
    }
 
+   if (RADEON_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "End %s\n", __FUNCTION__);
    return GL_TRUE;
 }
 
@@ -572,67 +694,30 @@ radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
 static GLboolean
 radeonUnbindContext( __DRIcontextPrivate *driContextPriv )
 {
-   return GL_TRUE;
-}
-
-/* Initialize the fullscreen mode.
- */
-static GLboolean
-radeonOpenFullScreen( __DRIcontextPrivate *driContextPriv )
-{
-#if 0
-   radeonContextPtr rmesa = (radeonContextPtr)driContextPriv->driverPrivate;
-   drmRadeonFullScreenType fs;
-   GLint ret;
-
-   /* FIXME: Do we need to check this?
-    */
-   if ( !rmesa->glCtx->Visual.doubleBufferMode )
-      return GL_TRUE;
-
-   LOCK_HARDWARE( rmesa );
-   radeonWaitForIdleLocked( rmesa );
-
-   /* Ignore errors.  If this fails, we simply don't do page flipping.
-    */
-   fs.func = RADEON_INIT_FULLSCREEN;
-   ret = drmCommandWrite( rmesa->driFd, DRM_RADEON_FULL_SCREEN,
-                          &fs, sizeof(drmRadeonFullScreenType) );
+   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
 
-   UNLOCK_HARDWARE( rmesa );
+   if (RADEON_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, rmesa->glCtx);
 
-   rmesa->doPageFlip = ( ret == 0 );
-#endif
+   radeonVtxfmtUnbindContext( rmesa->glCtx );
    return GL_TRUE;
 }
 
-/* Shut down the fullscreen mode.
+/* Fullscreen mode isn't used for much -- could be a way to shrink
+ * front/back buffers & get more texture memory if the client has
+ * changed the video resolution.
+ * 
+ * Pageflipping is now done automatically whenever there is a single
+ * 3d client.
  */
 static GLboolean
-radeonCloseFullScreen( __DRIcontextPrivate *driContextPriv )
+radeonOpenCloseFullScreen( __DRIcontextPrivate *driContextPriv )
 {
-#if 0
-   radeonContextPtr rmesa = (radeonContextPtr)driContextPriv->driverPrivate;
-   drmRadeonFullScreenType fs;
-
-   LOCK_HARDWARE( rmesa );
-   radeonWaitForIdleLocked( rmesa );
-
-   /* Don't care if this fails, we're not page flipping anymore.
-    */
-   fs.func = RADEON_CLEANUP_FULLSCREEN;
-   drmCommandWrite( rmesa->driFd, DRM_RADEON_FULL_SCREEN,
-                    &fs, sizeof(drmRadeonFullScreenType) );
-
-   UNLOCK_HARDWARE( rmesa );
-
-   rmesa->doPageFlip = GL_FALSE;
-   rmesa->currentPage = 0;
-#endif
    return GL_TRUE;
 }
 
 
+
 /* This function is called by libGL.so as soon as libGL.so is loaded.
  * This is where we'd register new extension functions with the dispatcher.
  */
@@ -653,8 +738,8 @@ static struct __DriverAPIRec radeonAPI = {
    radeonSwapBuffers,
    radeonMakeCurrent,
    radeonUnbindContext,
-   radeonOpenFullScreen,
-   radeonCloseFullScreen
+   radeonOpenCloseFullScreen,
+   radeonOpenCloseFullScreen
 };
 
 
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h
index 4dc915e40..541c90e1f 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h
@@ -40,15 +40,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #ifdef GLX_DIRECT_RENDERING
 
 #include <X11/Xlibint.h>
-
 #include "dri_util.h"
-
 #include "xf86drm.h"
 #include "radeon_common.h"
 
 #include "macros.h"
 #include "mtypes.h"
-
 #include "radeon_reg.h"
 
 struct radeon_context;
@@ -60,13 +57,14 @@ typedef struct radeon_context *radeonContextPtr;
 #include "mm.h"
 
 /* Flags for software fallback cases */
-/* See correponding strings in radeon_tris.c */
+/* See correponding strings in radeon_swtcl.c */
 #define RADEON_FALLBACK_TEXTURE		0x0001
 #define RADEON_FALLBACK_DRAW_BUFFER	0x0002
 #define RADEON_FALLBACK_STENCIL		0x0004
 #define RADEON_FALLBACK_RENDER_MODE	0x0008
 #define RADEON_FALLBACK_BLEND_EQ	0x0010
 #define RADEON_FALLBACK_BLEND_FUNC	0x0020
+#define RADEON_FALLBACK_DISABLE 	0x0040
 
 /* Use the templated vertex format:
  */
@@ -87,18 +85,9 @@ typedef void (*radeon_line_func)( radeonContextPtr,
 typedef void (*radeon_point_func)( radeonContextPtr,
 				   radeonVertex * );
 
-typedef void (*radeon_interp_func)( GLfloat t,
-				    radeonTnlVertex *O,
-				    const radeonTnlVertex *I,
-				    const radeonTnlVertex *J );
-
-typedef void (*radeon_prim_func)( GLcontext *ctx );
-typedef void (*radeon_flush_func)( GLcontext *ctx, radeonTnlVertex *v );
-
 
 struct radeon_colorbuffer_state {
    GLuint clear;
-
    GLint drawOffset, drawPitch;
 };
 
@@ -115,6 +104,10 @@ struct radeon_pixel_state {
 struct radeon_scissor_state {
    XF86DRIClipRectRec rect;
    GLboolean enabled;
+
+   GLuint numClipRects;			/* Cliprects active */
+   GLuint numAllocedClipRects;		/* Cliprects available */
+   XF86DRIClipRectPtr pClipRects;
 };
 
 struct radeon_stencilbuffer_state {
@@ -128,8 +121,9 @@ struct radeon_stipple_state {
 
 
 
-#define TEX_0 1
-#define TEX_1 2
+#define TEX_0   0x1
+#define TEX_1   0x2
+#define TEX_ALL 0x3
 
 typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
 
@@ -148,7 +142,13 @@ struct radeon_tex_obj {
 					   images need to be uploaded to
 					   local or AGP texture space */
 
-   GLint bound;				/* Texture unit currently bound to */
+   GLuint dirty_state;		        /* Flags (1 per texunit) for
+					   whether or not this texobj
+					   has dirty hardware state
+					   (pp_*) that needs to be
+					   brought into the
+					   texunit. */
+
    GLint heap;				/* Texture heap currently stored in */
 
    drmRadeonTexImage image[RADEON_MAX_TEXTURE_LEVELS];
@@ -179,10 +179,253 @@ struct radeon_texture_state {
    struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS];
 };
 
-struct radeon_state {
-   drmRadeonState hw;
+
+struct radeon_state_atom {
+   struct radeon_state_atom *next, *prev;
+   const char *name;		         /* for debug */
+   int cmd_size;		         /* size in bytes */
+   GLuint is_tcl;
+   int *cmd;			         /* one or more cmd's */
+   int *lastcmd;			 /* one or more cmd's */
+   GLboolean (*check)( GLcontext * );    /* is this state active? */
+};
+   
+
+
+/* Trying to keep these relatively short as the variables are becoming
+ * extravagently long.  Drop the RADEON_ off the front of everything -
+ * I think we know we're in the radeon driver by now, and keep the
+ * prefix to 3 letters unless absolutely impossible.  
+ */
+
+#define CTX_CMD_0             0
+#define CTX_PP_MISC           1
+#define CTX_PP_FOG_COLOR      2
+#define CTX_RE_SOLID_COLOR    3
+#define CTX_RB3D_BLENDCNTL    4
+#define CTX_RB3D_DEPTHOFFSET  5
+#define CTX_RB3D_DEPTHPITCH   6
+#define CTX_RB3D_ZSTENCILCNTL 7
+#define CTX_CMD_1             8
+#define CTX_PP_CNTL           9
+#define CTX_RB3D_CNTL         10
+#define CTX_RB3D_COLOROFFSET  11
+#define CTX_CMD_2             12
+#define CTX_RB3D_COLORPITCH   13
+#define CTX_STATE_SIZE        14
+
+#define SET_CMD_0               0
+#define SET_SE_CNTL             1
+#define SET_SE_COORDFMT         2
+#define SET_CMD_1               3
+#define SET_SE_CNTL_STATUS      4
+#define SET_STATE_SIZE          5
+
+#define LIN_CMD_0               0
+#define LIN_RE_LINE_PATTERN     1
+#define LIN_RE_LINE_STATE       2
+#define LIN_CMD_1               3
+#define LIN_SE_LINE_WIDTH       4
+#define LIN_STATE_SIZE          5
+
+#define MSK_CMD_0               0
+#define MSK_RB3D_STENCILREFMASK 1
+#define MSK_RB3D_ROPCNTL        2
+#define MSK_RB3D_PLANEMASK      3
+#define MSK_STATE_SIZE          4
+
+#define VPT_CMD_0           0
+#define VPT_SE_VPORT_XSCALE          1
+#define VPT_SE_VPORT_XOFFSET         2
+#define VPT_SE_VPORT_YSCALE          3
+#define VPT_SE_VPORT_YOFFSET         4
+#define VPT_SE_VPORT_ZSCALE          5
+#define VPT_SE_VPORT_ZOFFSET         6
+#define VPT_STATE_SIZE      7
+
+#define MSC_CMD_0               0
+#define MSC_RE_MISC             1
+#define MSC_STATE_SIZE          2
+
+#define TEX_CMD_0                   0
+#define TEX_PP_TXFILTER             1
+#define TEX_PP_TXFORMAT             2
+#define TEX_PP_TXOFFSET             3
+#define TEX_PP_TXCBLEND             4
+#define TEX_PP_TXABLEND             5
+#define TEX_PP_TFACTOR              6
+#define TEX_CMD_1                   7
+#define TEX_PP_BORDER_COLOR         8
+#define TEX_STATE_SIZE              9
+
+#define ZBS_CMD_0              0
+#define ZBS_SE_ZBIAS_FACTOR             1
+#define ZBS_SE_ZBIAS_CONSTANT           2
+#define ZBS_STATE_SIZE         3
+
+#define TCL_CMD_0                        0
+#define TCL_OUTPUT_VTXFMT         1
+#define TCL_OUTPUT_VTXSEL         2
+#define TCL_MATRIX_SELECT_0       3
+#define TCL_MATRIX_SELECT_1       4
+#define TCL_UCP_VERT_BLEND_CTL    5
+#define TCL_TEXTURE_PROC_CTL      6
+#define TCL_LIGHT_MODEL_CTL       7
+#define TCL_PER_LIGHT_CTL_0       8
+#define TCL_PER_LIGHT_CTL_1       9
+#define TCL_PER_LIGHT_CTL_2       10
+#define TCL_PER_LIGHT_CTL_3       11
+#define TCL_STATE_SIZE                   12
+
+#define MTL_CMD_0            0	
+#define MTL_EMMISSIVE_RED    1	
+#define MTL_EMMISSIVE_GREEN  2	
+#define MTL_EMMISSIVE_BLUE   3	
+#define MTL_EMMISSIVE_ALPHA  4	
+#define MTL_AMBIENT_RED      5
+#define MTL_AMBIENT_GREEN    6
+#define MTL_AMBIENT_BLUE     7
+#define MTL_AMBIENT_ALPHA    8
+#define MTL_DIFFUSE_RED      9
+#define MTL_DIFFUSE_GREEN    10
+#define MTL_DIFFUSE_BLUE     11
+#define MTL_DIFFUSE_ALPHA    12
+#define MTL_SPECULAR_RED     13
+#define MTL_SPECULAR_GREEN   14
+#define MTL_SPECULAR_BLUE    15
+#define MTL_SPECULAR_ALPHA   16
+#define MTL_SHININESS        17
+#define MTL_STATE_SIZE       18
+
+#define VTX_CMD_0              0
+#define VTX_SE_COORD_FMT       1
+#define VTX_STATE_SIZE         2
+
+#define MAT_CMD_0              0
+#define MAT_ELT_0              1
+#define MAT_STATE_SIZE         17
+
+#define GRD_CMD_0                  0
+#define GRD_VERT_GUARD_CLIP_ADJ    1
+#define GRD_VERT_GUARD_DISCARD_ADJ 2
+#define GRD_HORZ_GUARD_CLIP_ADJ    3
+#define GRD_HORZ_GUARD_DISCARD_ADJ 4
+#define GRD_STATE_SIZE             5
+
+/* position changes frequently when lighting in modelpos - separate
+ * out to new state item?  
+ */
+#define LIT_CMD_0                  0
+#define LIT_AMBIENT_RED            1
+#define LIT_AMBIENT_GREEN          2
+#define LIT_AMBIENT_BLUE           3
+#define LIT_AMBIENT_ALPHA          4
+#define LIT_DIFFUSE_RED            5
+#define LIT_DIFFUSE_GREEN          6
+#define LIT_DIFFUSE_BLUE           7
+#define LIT_DIFFUSE_ALPHA          8
+#define LIT_SPECULAR_RED           9
+#define LIT_SPECULAR_GREEN         10
+#define LIT_SPECULAR_BLUE          11
+#define LIT_SPECULAR_ALPHA         12
+#define LIT_POSITION_X             13
+#define LIT_POSITION_Y             14
+#define LIT_POSITION_Z             15
+#define LIT_POSITION_W             16
+#define LIT_DIRECTION_X            17
+#define LIT_DIRECTION_Y            18
+#define LIT_DIRECTION_Z            19
+#define LIT_DIRECTION_W            20
+#define LIT_ATTEN_CONST            21
+#define LIT_ATTEN_LINEAR           22
+#define LIT_ATTEN_QUADRATIC        23
+#define LIT_ATTEN_XXX              24
+#define LIT_CMD_1                  25
+#define LIT_SPOT_DCD               26
+#define LIT_SPOT_EXPONENT          27
+#define LIT_SPOT_CUTOFF            28
+#define LIT_SPECULAR_THRESH        29
+#define LIT_RANGE_CUTOFF           30 /* ? */
+#define LIT_RANGE_ATTEN            31 /* ? */
+#define LIT_STATE_SIZE             32
+
+/* Fog
+ */
+#define FOG_CMD_0      0
+#define FOG_R          1
+#define FOG_C          2
+#define FOG_D          3
+#define FOG_PAD        4
+#define FOG_STATE_SIZE 5
+
+/* UCP
+ */
+#define UCP_CMD_0      0
+#define UCP_X          1
+#define UCP_Y          2
+#define UCP_Z          3
+#define UCP_W          4
+#define UCP_STATE_SIZE 5
+
+/* GLT - Global ambient
+ */
+#define GLT_CMD_0      0
+#define GLT_RED        1
+#define GLT_GREEN      2
+#define GLT_BLUE       3
+#define GLT_ALPHA      4
+#define GLT_STATE_SIZE 5
+
+/* EYE
+ */
+#define EYE_CMD_0          0
+#define EYE_X              1
+#define EYE_Y              2
+#define EYE_Z              3
+#define EYE_RESCALE_FACTOR 4
+#define EYE_STATE_SIZE     5
+
+#define SHN_CMD_0          0
+#define SHN_SHININESS      1
+#define SHN_STATE_SIZE     2
+
+
+
 
 
+struct radeon_hw_state {
+   /* All state should be on one of these lists:
+    */
+   struct radeon_state_atom dirty; /* dirty list head placeholder */
+   struct radeon_state_atom clean; /* clean list head placeholder */
+
+   /* Hardware state, stored as cmdbuf commands:  
+    *   -- Need to doublebuffer for
+    *           - reviving state after loss of context
+    *           - eliding noop statechange loops? (except line stipple count)
+    */
+   struct radeon_state_atom ctx;
+   struct radeon_state_atom set;
+   struct radeon_state_atom lin;
+   struct radeon_state_atom msk;
+   struct radeon_state_atom vpt;
+   struct radeon_state_atom tcl;
+   struct radeon_state_atom msc;
+   struct radeon_state_atom tex[2];
+   struct radeon_state_atom zbs;
+   struct radeon_state_atom mtl; 
+   struct radeon_state_atom mat[5]; 
+   struct radeon_state_atom lit[8]; /* includes vec, scl commands */
+   struct radeon_state_atom ucp[6];
+   struct radeon_state_atom eye; /* eye pos */
+   struct radeon_state_atom grd; /* guard band clipping */
+   struct radeon_state_atom fog; 
+   struct radeon_state_atom glt; 
+};
+
+struct radeon_state {
+   /* Derived state for internal purposes:
+    */
    struct radeon_colorbuffer_state color;
    struct radeon_depthbuffer_state depth;
    struct radeon_pixel_state pixel;
@@ -202,13 +445,40 @@ struct radeon_texture {
    GLint numHeaps;
 };
 
+/* Need refcounting on dma buffers:
+ */
+struct radeon_dma_buffer {
+   int refcount;		/* the number of retained regions in buf */
+   drmBufPtr buf;
+};
+
+#define GET_START(rvb) (rmesa->dri.agp_buffer_offset +			\
+			(rvb)->address - rmesa->dma.buf0_address +	\
+			(rvb)->start)
+
+/* A retained region, eg vertices for indexed vertices.
+ */
+struct radeon_dma_region {
+   struct radeon_dma_buffer *buf;
+   char *address;		/* == buf->address */
+   int start, end, ptr;		/* offsets from start of buf */
+   int aos_start;
+   int aos_stride;
+   int aos_size;
+};
+
 
 struct radeon_dma {
-   drmBufPtr buffer;
-   drmBufPtr retained;
-   GLubyte *address;
-   GLuint low, high, last;
-   GLuint offset;
+   /* Active dma region.  Allocations for vertices and retained
+    * regions come from here.  Also used for emitting random vertices,
+    * these may be flushed by calling flush_current();
+    */
+   struct radeon_dma_region current;
+   
+   void (*flush)( radeonContextPtr );
+
+   char *buf0_address;		/* start of buf[0], for index calcs */
+   GLuint nr_released_bufs;	/* flush after so many buffers released */
 };
 
 struct radeon_dri_mirror {
@@ -221,14 +491,194 @@ struct radeon_dri_mirror {
    drmContext hwContext;
    drmLock *hwLock;
    int fd;
+   int drmMinor;
+   int agp_buffer_offset;
 };
 
+
+#define RADEON_CMD_BUF_SZ  (8*1024) 
+
 struct radeon_store {
-   radeonTexObjPtr texture[2][RADEON_MAX_STATES];
-   drmRadeonState state[RADEON_MAX_STATES];
-   drmRadeonPrim prim[RADEON_MAX_PRIMS];
    GLuint statenr;
    GLuint primnr;
+   char cmd_buf[RADEON_CMD_BUF_SZ];
+   int cmd_used;   
+   int elts_start;
+};
+
+
+/* radeon_tcl.c
+ */
+struct radeon_tcl_info {
+   GLuint vertex_format;
+   GLint last_offset;
+   GLuint hw_primitive;
+
+   struct radeon_dma_region *aos_components[8];
+   GLuint nr_aos_components;
+
+   GLuint *Elts;
+
+   struct radeon_dma_region indexed_verts;
+   struct radeon_dma_region obj;
+   struct radeon_dma_region rgba;
+   struct radeon_dma_region spec;
+   struct radeon_dma_region fog;
+   struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS];
+   struct radeon_dma_region norm;
+};
+
+
+/* radeon_swtcl.c
+ */
+struct radeon_swtcl_info {
+   GLuint SetupIndex;
+   GLuint SetupNewInputs;
+   GLuint RenderIndex;
+   GLuint vertex_size;
+   GLuint vertex_stride_shift;
+   GLuint vertex_format;
+   char *verts;
+
+   /* Fallback rasterization functions
+    */
+   radeon_point_func draw_point;
+   radeon_line_func draw_line;
+   radeon_tri_func draw_tri;
+
+   GLuint hw_primitive;
+   GLenum render_primitive;
+   GLuint numverts;
+
+   struct radeon_dma_region indexed_verts;
+};
+
+
+struct radeon_ioctl {
+   GLuint vertex_offset;
+   GLuint vertex_size;
+};
+
+
+
+#define RADEON_MAX_PRIMS 64
+
+
+/* Want to keep a cache of these around.  Each is parameterized by
+ * only a single value which has only a small range.  Only expect a
+ * few, so just rescan the list each time?
+ */
+struct dynfn {
+   struct dynfn *next, *prev;
+   int key;
+   char *code;
+};
+
+struct dfn_lists {
+   struct dynfn Vertex2f;
+   struct dynfn Vertex2fv;
+   struct dynfn Vertex3f;
+   struct dynfn Vertex3fv;
+   struct dynfn Color4ub;
+   struct dynfn Color4ubv;
+   struct dynfn Color3ub;
+   struct dynfn Color3ubv;
+   struct dynfn Color4f;
+   struct dynfn Color4fv;
+   struct dynfn Color3f;
+   struct dynfn Color3fv;
+   struct dynfn SecondaryColor3ubEXT;
+   struct dynfn SecondaryColor3ubvEXT;
+   struct dynfn SecondaryColor3fEXT;
+   struct dynfn SecondaryColor3fvEXT;
+   struct dynfn Normal3f;
+   struct dynfn Normal3fv;
+   struct dynfn TexCoord2f;
+   struct dynfn TexCoord2fv;
+   struct dynfn TexCoord1f;
+   struct dynfn TexCoord1fv;
+   struct dynfn MultiTexCoord2fARB;
+   struct dynfn MultiTexCoord2fvARB;
+   struct dynfn MultiTexCoord1fARB;
+   struct dynfn MultiTexCoord1fvARB;
+};
+
+struct _vb;
+
+struct dfn_generators {
+   struct dynfn *(*Vertex2f)( GLcontext *, int );
+   struct dynfn *(*Vertex2fv)( GLcontext *, int );
+   struct dynfn *(*Vertex3f)( GLcontext *, int );
+   struct dynfn *(*Vertex3fv)( GLcontext *, int );
+   struct dynfn *(*Color4ub)( GLcontext *, int );
+   struct dynfn *(*Color4ubv)( GLcontext *, int );
+   struct dynfn *(*Color3ub)( GLcontext *, int );
+   struct dynfn *(*Color3ubv)( GLcontext *, int );
+   struct dynfn *(*Color4f)( GLcontext *, int );
+   struct dynfn *(*Color4fv)( GLcontext *, int );
+   struct dynfn *(*Color3f)( GLcontext *, int );
+   struct dynfn *(*Color3fv)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3ubEXT)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3ubvEXT)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3fEXT)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3fvEXT)( GLcontext *, int );
+   struct dynfn *(*Normal3f)( GLcontext *, int );
+   struct dynfn *(*Normal3fv)( GLcontext *, int );
+   struct dynfn *(*TexCoord2f)( GLcontext *, int );
+   struct dynfn *(*TexCoord2fv)( GLcontext *, int );
+   struct dynfn *(*TexCoord1f)( GLcontext *, int );
+   struct dynfn *(*TexCoord1fv)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord2fARB)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord2fvARB)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord1fARB)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord1fvARB)( GLcontext *, int );
+};
+
+
+struct radeon_vb {
+   /* Keep these first: referenced from codegen templates:
+    */
+   GLint counter, initial_counter;
+   GLint *dmaptr;
+   void (*notify)( void );
+   GLint vertex_size;
+   union { float f; int i; GLubyte ub4[4]; } vertex[15];
+
+   GLfloat *normalptr;
+   GLfloat *floatcolorptr;
+   GLubyte *ubytecolorptr;
+   GLubyte *ubytespecptr;
+   GLfloat *texcoordptr[2];
+
+   GLcontext *context;		/* current context : Single thread only! */
+};
+
+struct radeon_prim {
+   GLuint start;
+   GLuint end;
+   GLuint prim;
+};
+
+struct radeon_vbinfo {
+   GLenum *prim;		/* &ctx->Driver.CurrentExecPrimitive */
+   GLuint primflags;
+   GLboolean enabled;		/* RADEON_NO_VTXFMT//RADEON_NO_TCL env vars */
+   GLboolean installed;
+   GLboolean fell_back;
+   GLboolean recheck;
+   GLint initial_counter;
+   GLint nrverts;
+   GLuint vertex_format;
+
+   GLuint installed_vertex_format;
+   GLuint installed_color_3f_sz;
+
+   struct radeon_prim primlist[RADEON_MAX_PRIMS];
+   int nrprims;
+
+   struct dfn_lists dfn_cache;
+   struct dfn_generators codegen;
+   GLvertexformat vtxfmt;
 };
 
 
@@ -239,31 +689,20 @@ struct radeon_context {
 
    /* Driver and hardware state management
     */
+   struct radeon_hw_state hw;
    struct radeon_state state;
 
    /* Texture object bookkeeping
     */
    struct radeon_texture texture;
 
-   /* Fallback rasterization functions
-    */
-   radeon_point_func draw_point;
-   radeon_line_func draw_line;
-   radeon_tri_func draw_tri;
 
    /* Rasterization and vertex state:
     */
-   GLuint NewGLState;
+   GLuint TclFallback;
    GLuint Fallback;
-   GLuint SetupIndex;
-   GLuint SetupNewInputs;
-   GLuint RenderIndex;
+   GLuint NewGLState;
 
-   GLuint vertex_size;
-   GLuint vertex_stride_shift;
-   GLuint vertex_format;
-   GLuint num_verts;
-   char *verts;
    
    /* Temporaries for translating away float colors:
     */
@@ -272,34 +711,50 @@ struct radeon_context {
 
    /* Vertex buffers
     */
+   struct radeon_ioctl ioctl;
    struct radeon_dma dma;
-
    struct radeon_store store;
-   GLboolean upload_cliprects;
-
-   GLuint hw_primitive;
-   GLenum render_primitive;
 
    /* Page flipping
     */
    GLuint doPageFlip;
-   GLuint currentPage;
 
    /* Drawable, cliprect and scissor information
     */
    GLuint numClipRects;			/* Cliprects for the draw buffer */
    XF86DRIClipRectPtr pClipRects;
    GLuint lastStamp;
+   GLboolean lost_context;
+   radeonScreenPtr radeonScreen;	/* Screen private DRI data */
+   RADEONSAREAPrivPtr sarea;		/* Private SAREA data */
 
-   /* Mirrors of some DRI state
+   /* TCL stuff
     */
-   struct radeon_dri_mirror dri;
+   GLmatrix TexGenMatrix[RADEON_MAX_TEXTURE_UNITS];
+   GLboolean recheck_texgen[RADEON_MAX_TEXTURE_UNITS];
+   GLboolean TexGenNeedNormals[RADEON_MAX_TEXTURE_UNITS];
+   GLuint TexMatEnabled;
+   GLuint TexGenEnabled;
+   GLmatrix tmpmat;
+   GLuint last_ReallyEnabled;
+
+   /* radeon_tcl.c
+    */
+   struct radeon_tcl_info tcl;
 
-   radeonScreenPtr radeonScreen;	/* Screen private DRI data */
-   RADEONSAREAPrivPtr sarea;		/* Private SAREA data */
+   /* radeon_swtcl.c
+    */
+   struct radeon_swtcl_info swtcl;
+
+   /* radeon_vtxfmt.c
+    */
+   struct radeon_vbinfo vb;
 
-   GLboolean debugFallbacks;
+   /* Mirrors of some DRI state
+    */
+   struct radeon_dri_mirror dri;
 
+ 
    /* Performance counters
     */
    GLuint boxes;			/* Draw performance boxes */
@@ -328,11 +783,12 @@ static __inline GLuint radeonPackColor( GLuint cpp,
    }
 }
 
+#define RADEON_OLD_PACKETS 0
 
 /* ================================================================
  * Debugging:
  */
-#define DO_DEBUG		0
+#define DO_DEBUG		1
 
 #if DO_DEBUG
 extern int RADEON_DEBUG;
@@ -340,14 +796,18 @@ extern int RADEON_DEBUG;
 #define RADEON_DEBUG		0
 #endif
 
-#define DEBUG_ALWAYS_SYNC	0x01
-#define DEBUG_VERBOSE_API	0x02
-#define DEBUG_VERBOSE_MSG	0x04
-#define DEBUG_VERBOSE_LRU	0x08
-#define DEBUG_VERBOSE_DRI	0x10
-#define DEBUG_VERBOSE_IOCTL	0x20
-#define DEBUG_VERBOSE_2D	0x40
-#define DEBUG_VERBOSE_TEXTURE	0x80
+#define DEBUG_TEXTURE	0x001
+#define DEBUG_STATE	0x002
+#define DEBUG_IOCTL	0x004
+#define DEBUG_PRIMS	0x008
+#define DEBUG_VERTS	0x010
+#define DEBUG_FALLBACKS	0x020
+#define DEBUG_VFMT	0x040
+#define DEBUG_CODEGEN	0x080
+#define DEBUG_VERBOSE	0x100
+#define DEBUG_DRI       0x200
+#define DEBUG_DMA       0x400
+#define DEBUG_SANITY    0x800
 
 #endif
 #endif /* __RADEON_CONTEXT_H__ */
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c
index 521580319..317681c39 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c
@@ -31,461 +31,568 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  * Authors:
  *   Kevin E. Martin <martin@valinux.com>
  *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keithw@tungstengraphics.com>
  *
  */
 
 #include "radeon_context.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
+#include "radeon_tcl.h"
+#include "radeon_sanity.h"
 
 #include "mem.h"
 #include "macros.h"
 #include "swrast/swrast.h"
+#include "simple_list.h"
 
 #define RADEON_TIMEOUT             512
 #define RADEON_IDLE_RETRY           16
 
+static void radeonWaitForIdle( radeonContextPtr rmesa );
 
 /* =============================================================
- * Hardware vertex buffer handling
+ * Kernel command buffer handling
  */
 
-/* Get a new VB from the pool of vertex buffers in AGP space.
- */
-drmBufPtr radeonGetBufferLocked( radeonContextPtr rmesa )
+static void print_state_atom( struct radeon_state_atom *state )
 {
-   int fd = rmesa->dri.fd;
-   int index = 0;
-   int size = 0;
-   drmDMAReq dma;
-   drmBufPtr buf = NULL;
-   int to = 0;
-   int ret;
+   int i;
 
-   dma.context = rmesa->dri.hwContext;
-   dma.send_count = 0;
-   dma.send_list = NULL;
-   dma.send_sizes = NULL;
-   dma.flags = 0;
-   dma.request_count = 1;
-   dma.request_size = RADEON_BUFFER_SIZE;
-   dma.request_list = &index;
-   dma.request_sizes = &size;
-   dma.granted_count = 0;
+   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
 
-   while ( !buf && ( to++ < RADEON_TIMEOUT ) ) {
-      ret = drmDMA( fd, &dma );
+   if (RADEON_DEBUG & DEBUG_VERBOSE) 
+      for (i = 0 ; i < state->cmd_size ; i++) 
+	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
 
-      if ( ret == 0 ) {
-	 buf = &rmesa->radeonScreen->buffers->list[index];
-	 buf->used = 0;
-	 /* Bump the performance counter */
-	 rmesa->c_vertexBuffers++;
-	 return buf;
+}
+
+static void radeon_emit_state_list( radeonContextPtr rmesa, 
+				    struct radeon_state_atom *list )
+{
+   struct radeon_state_atom *state, *tmp;
+   char *dest;
+
+   foreach_s( state, tmp, list ) {
+      if (state->check( rmesa->glCtx )) {
+	 dest = radeonAllocCmdBuf( rmesa, state->cmd_size * 4, __FUNCTION__);
+	 memcpy( dest, state->cmd, state->cmd_size * 4);
+	 move_to_head( &(rmesa->hw.clean), state );
+	 if (RADEON_DEBUG & DEBUG_STATE) 
+	    print_state_atom( state );
       }
+      else if (RADEON_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "skip state %s\n", state->name);
    }
+}
+
+
+void radeonEmitState( radeonContextPtr rmesa )
+{
+   struct radeon_state_atom *state, *tmp;
 
-   if ( !buf ) {
-      UNLOCK_HARDWARE( rmesa );
-      fprintf( stderr, "Error: Could not get new VB... exiting\n" );
-      exit( -1 );
+   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* Somewhat overkill:
+    */
+   if (rmesa->lost_context) {
+      if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
+	 fprintf(stderr, "%s - lost context\n", __FUNCTION__); 
+
+      foreach_s( state, tmp, &(rmesa->hw.clean) ) 
+	 move_to_tail(&(rmesa->hw.dirty), state );
+
+      rmesa->lost_context = 0;
+   }
+   else if (1) {
+      /* This is a darstardly kludge to work around a lockup that I
+       * haven't otherwise figured out.
+       */
+      move_to_tail(&(rmesa->hw.dirty), &(rmesa->hw.zbs) );
    }
 
-   return buf;
+   radeon_emit_state_list( rmesa, &rmesa->hw.dirty );
 }
 
 
-static GLboolean intersect_rect( XF86DRIClipRectPtr out,
-				 XF86DRIClipRectPtr a,
-				 XF86DRIClipRectPtr b )
+
+/* Fire a section of the retained (indexed_verts) buffer as a regular
+ * primtive.  
+ */
+extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
+				GLuint vertex_format,
+				GLuint primitive,
+				GLuint vertex_nr )
 {
-   *out = *a;
-   if ( b->x1 > out->x1 ) out->x1 = b->x1;
-   if ( b->y1 > out->y1 ) out->y1 = b->y1;
-   if ( b->x2 < out->x2 ) out->x2 = b->x2;
-   if ( b->y2 < out->y2 ) out->y2 = b->y2;
-   if ( out->x1 >= out->x2 ) return GL_FALSE;
-   if ( out->y1 >= out->y2 ) return GL_FALSE;
-   return GL_TRUE;
+   drmRadeonCmdHeader *cmd;
+
+
+   assert(rmesa->dri.drmMinor >= 3); 
+   assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   
+   radeonEmitState( rmesa );
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
+	      rmesa->store.cmd_used/4);
+   
+#if RADEON_OLD_PACKETS
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 6 * sizeof(*cmd),
+						  __FUNCTION__ );
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
+   cmd[2].i = rmesa->ioctl.vertex_offset;
+   cmd[3].i = vertex_nr;
+   cmd[4].i = vertex_format;
+   cmd[5].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+
+   if (RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
+	      __FUNCTION__,
+	      cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
+#else
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 4 * sizeof(*cmd),
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
+   cmd[2].i = vertex_format;
+   cmd[3].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+
+
+   if (RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
+	      __FUNCTION__,
+	      cmd[1].i, cmd[2].i, cmd[3].i);
+#endif
 }
 
-static void emit_state( radeonContextPtr rmesa,
-			drmRadeonState *dest,
-			int dirty )
+
+void radeonFlushElts( radeonContextPtr rmesa )
 {
-   struct radeon_state *state = &rmesa->state;
+   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
+   int dwords;
+#if RADEON_OLD_PACKETS
+   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
+#else
+   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
+#endif
 
-   if ( dirty & RADEON_UPLOAD_CONTEXT )
-      memcpy( &dest->context, &state->hw.context, sizeof(dest->context) );
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
-   if ( dirty & RADEON_UPLOAD_VERTFMT )
-      memcpy( &dest->vertex, &state->hw.vertex, sizeof(dest->vertex) );
+   assert( rmesa->dma.flush == radeonFlushElts );
+   rmesa->dma.flush = 0;
 
-   if ( dirty & RADEON_UPLOAD_LINE )
-      memcpy( &dest->line, &state->hw.line, sizeof(dest->line) );
+   /* Cope with odd number of elts:
+    */
+   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
+   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
+
+#if RADEON_OLD_PACKETS
+   cmd[1] |= (dwords - 3) << 16;
+   cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+#else
+   cmd[1] |= (dwords - 3) << 16;
+   cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+#endif
+}
 
-   if ( dirty & RADEON_UPLOAD_BUMPMAP )
-      memcpy( &dest->bumpmap, &state->hw.bumpmap, sizeof(dest->bumpmap) );
 
-   if ( dirty & RADEON_UPLOAD_MASKS )
-      memcpy( &dest->mask, &state->hw.mask, sizeof(dest->mask) );
+GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
+				    GLuint vertex_format,
+				    GLuint primitive,
+				    GLuint min_nr )
+{
+   drmRadeonCmdHeader *cmd;
+   GLushort *retval;
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
+
+   assert(rmesa->dri.drmMinor >= 3); 
+   assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   
+   radeonEmitState( rmesa );
+   
+#if RADEON_OLD_PACKETS
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 
+						  24 + min_nr*2,
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
+   cmd[2].i = rmesa->ioctl.vertex_offset;
+   cmd[3].i = 0xffff;
+   cmd[4].i = vertex_format;
+   cmd[5].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+
+   retval = (GLushort *)(cmd+6);
+#else   
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 
+						  16 + min_nr*2,
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
+   cmd[2].i = vertex_format;
+   cmd[3].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+
+   retval = (GLushort *)(cmd+4);
+#endif
 
-   if ( dirty & RADEON_UPLOAD_VIEWPORT )
-      memcpy( &dest->viewport, &state->hw.viewport, sizeof(dest->viewport) );
+   if (RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
+	      __FUNCTION__,
+	      cmd[1].i, vertex_format, primitive);
 
-   if ( dirty & RADEON_UPLOAD_SETUP ) {
-      memcpy( &dest->setup1, &state->hw.setup1, sizeof(dest->setup1) );
-      memcpy( &dest->setup2, &state->hw.setup2, sizeof(dest->setup2) );
-   }
+   assert(!rmesa->dma.flush);
+   rmesa->dma.flush = radeonFlushElts;
 
-   if ( dirty & RADEON_UPLOAD_MISC )
-      memcpy( &dest->misc, &state->hw.misc, sizeof(dest->misc) );
+   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
 
-   if ( dirty & RADEON_UPLOAD_ZBIAS )
-      memcpy( &dest->zbias, &state->hw.zbias, sizeof(dest->zbias) );
+   return retval;
+}
 
-   /* Assemble the texture state, combining the texture object and
-    * texture environment state into the hardware texture unit state.
-    */
-   if ( dirty & RADEON_UPLOAD_TEX0 ) {
-      radeonTexObjPtr t0 = state->texture.unit[0].texobj;
-
-      dest->texture[0].pp_txfilter = t0->pp_txfilter | state->hw.texture[0].pp_txfilter;
-      dest->texture[0].pp_txformat = t0->pp_txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0;
-      dest->texture[0].pp_txoffset = t0->pp_txoffset;
-      dest->texture[0].pp_border_color = t0->pp_border_color;
-      dest->texture[0].pp_txcblend = state->hw.texture[0].pp_txcblend;
-      dest->texture[0].pp_txablend = state->hw.texture[0].pp_txablend;
-      dest->texture[0].pp_tfactor = state->hw.texture[0].pp_tfactor;
-   }
 
-   if ( dirty & RADEON_UPLOAD_TEX1 ) {
-      radeonTexObjPtr t1 = state->texture.unit[1].texobj;
 
-      dest->texture[1].pp_txfilter = t1->pp_txfilter | state->hw.texture[1].pp_txfilter;
-      dest->texture[1].pp_txformat = t1->pp_txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1;
-      dest->texture[1].pp_txoffset = t1->pp_txoffset;
-      dest->texture[1].pp_border_color = t1->pp_border_color;
-      dest->texture[1].pp_txcblend = state->hw.texture[1].pp_txcblend;
-      dest->texture[1].pp_txablend = state->hw.texture[1].pp_txablend;
-      dest->texture[1].pp_tfactor = state->hw.texture[1].pp_tfactor;
-   }
+void radeonEmitVertexAOS( radeonContextPtr rmesa,
+			  GLuint vertex_size,
+			  GLuint offset )
+{
+#if RADEON_OLD_PACKETS
+   rmesa->ioctl.vertex_size = vertex_size;
+   rmesa->ioctl.vertex_offset = offset;
+#else
+   drmRadeonCmdHeader *cmd;
+   assert(rmesa->dri.drmMinor >= 3); 
+
+   if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
+      fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
+	      __FUNCTION__, vertex_size, offset);
+
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 5 * sizeof(int),
+						  __FUNCTION__ );
+
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
+   cmd[2].i = 1;
+   cmd[3].i = vertex_size | (vertex_size << 8);
+   cmd[4].i = offset;
+#endif
 }
+		       
 
-
-static void print_values( const char *name, const void *vals, int sz )
+void radeonEmitAOS( radeonContextPtr rmesa,
+		    struct radeon_dma_region **component,
+		    GLuint nr,
+		    GLuint offset )
 {
-   const int *ivals = (const int *)vals;
+#if RADEON_OLD_PACKETS
+   assert( nr == 1 );
+   assert( component[0]->aos_size == component[0]->aos_stride );
+   rmesa->ioctl.vertex_size = component[0]->aos_size;
+   rmesa->ioctl.vertex_offset = 
+      (component[0]->aos_start + offset * component[0]->aos_stride * 4);
+#else
+   drmRadeonCmdHeader *cmd;
+   int sz = 3 + (nr/2 * 3) + (nr & 1) * 2;
    int i;
+   int *tmp;
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(rmesa->dri.drmMinor >= 3); 
+
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sz * sizeof(int),
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | ((sz-3) << 16);
+   cmd[2].i = nr;
+   tmp = &cmd[0].i;
+   cmd += 3;
+
+   for (i = 0 ; i < nr ; i++) {
+      if (i & 1) {
+	 cmd[0].i |= ((component[i]->aos_stride << 24) | 
+		      (component[i]->aos_size << 16));
+	 cmd[2].i = (component[i]->aos_start + 
+		     offset * component[i]->aos_stride * 4);
+	 cmd += 3;
+      }
+      else {
+	 cmd[0].i = ((component[i]->aos_stride << 8) | 
+		     (component[i]->aos_size << 0));
+	 cmd[1].i = (component[i]->aos_start + 
+		     offset * component[i]->aos_stride * 4);
+      }
+   }
 
-   for (i = 0; i < sz/4 ; i++)
-      fprintf(stderr, "%s %d: 0x%x\n", name, i, ivals[i]);
+   if (RADEON_DEBUG & DEBUG_VERTS) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      for (i = 0 ; i < sz ; i++)
+	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
+   }
+#endif
 }
 
-static void print_state( drmRadeonState *state )
-{
-   int dirty = state->dirty;
 
-   if ( dirty & RADEON_UPLOAD_CONTEXT ) 
-      print_values( "CONTEXT", &state->context, sizeof(state->context) );
-
-   if ( dirty & RADEON_UPLOAD_VERTFMT )
-      print_values( "VERTFMT", &state->vertex, sizeof(state->vertex) );
+static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
+				    const char * caller )
+{
+   int ret, i;
+   drmRadeonCmdBuffer cmd;
 
-   if ( dirty & RADEON_UPLOAD_LINE )
-      print_values( "LINE", &state->line, sizeof(state->line) );
+   if (RADEON_DEBUG & DEBUG_IOCTL) {
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
 
-   if ( dirty & RADEON_UPLOAD_BUMPMAP )
-      print_values( "BUMPMAP", &state->bumpmap, sizeof(state->bumpmap) );
+      if (RADEON_DEBUG & DEBUG_VERBOSE) 
+	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
+	    fprintf(stderr, "%d: %x\n", i/4, 
+		    *(int *)(&rmesa->store.cmd_buf[i]));
+   }
 
-   if ( dirty & RADEON_UPLOAD_MASKS )
-      print_values( "MASKS", &state->mask, sizeof(state->mask) );
+   if (RADEON_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
+	      rmesa->dma.nr_released_bufs);
 
-   if ( dirty & RADEON_UPLOAD_VIEWPORT )
-      print_values( "VIEWPORT", &state->viewport, sizeof(state->viewport) );
 
-   if ( dirty & RADEON_UPLOAD_SETUP ) {
-      print_values( "SETUP", &state->setup1, sizeof(state->setup1) );
-      print_values( "SETUP2", &state->setup2, sizeof(state->setup2) );
+   if (RADEON_DEBUG & DEBUG_SANITY) {
+      if (rmesa->state.scissor.enabled) 
+	 ret = radeonSanityCmdBuffer( rmesa, 
+				      rmesa->state.scissor.numClipRects,
+				      rmesa->state.scissor.pClipRects);
+      else
+	 ret = radeonSanityCmdBuffer( rmesa, 
+				      rmesa->numClipRects,
+				      rmesa->pClipRects);
    }
 
-   if ( dirty & RADEON_UPLOAD_MISC )
-      print_values( "MISC", &state->misc, sizeof(state->misc) );
+   cmd.bufsz = rmesa->store.cmd_used;
+   cmd.buf = rmesa->store.cmd_buf;
 
-   if ( dirty & RADEON_UPLOAD_ZBIAS )
-      print_values( "ZBIAS", &state->zbias, sizeof(state->zbias) );
+   if (rmesa->state.scissor.enabled) {
+      cmd.nbox = rmesa->state.scissor.numClipRects;
+      cmd.boxes = (drmClipRect *)rmesa->state.scissor.pClipRects;
+   } else {
+      cmd.nbox = rmesa->numClipRects;
+      cmd.boxes = (drmClipRect *)rmesa->pClipRects;
+   }
 
-   if ( dirty & RADEON_UPLOAD_TEX0 ) 
-      print_values( "TEX0", &state->texture[0], sizeof(state->texture[0]) );
+   ret = drmCommandWrite( rmesa->dri.fd,
+			  DRM_RADEON_CMDBUF,
+			  &cmd, sizeof(cmd) );
 
-   if ( dirty & RADEON_UPLOAD_TEX1 ) 
-      print_values( "TEX1", &state->texture[1], sizeof(state->texture[1]) );
+   rmesa->store.primnr = 0;
+   rmesa->store.statenr = 0;
+   rmesa->store.cmd_used = 0;
+   rmesa->dma.nr_released_bufs = 0;
+   rmesa->lost_context = 1;	
+   return ret;
 }
 
 
-static void emit_prim( radeonContextPtr rmesa )
+/* Note: does not emit any commands to avoid recursion on
+ * radeonAllocCmdBuf.
+ */
+void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
 {
-   GLuint prim = rmesa->store.primnr++;
-   GLuint dirty = rmesa->state.hw.dirty;
-
-   rmesa->store.prim[prim].prim = rmesa->hw_primitive;
-   rmesa->store.prim[prim].start = rmesa->dma.last;
-   rmesa->store.prim[prim].finish = rmesa->dma.low;
-   rmesa->store.prim[prim].vc_format = rmesa->vertex_format;
-
-   if (rmesa->hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)
-      rmesa->store.prim[prim].numverts = rmesa->dma.offset / 64;
-   else
-      rmesa->store.prim[prim].numverts = rmesa->num_verts;
-
-   rmesa->num_verts = 0;
-   rmesa->dma.last = rmesa->dma.low;
-
-
+   int ret;
 
+	      
+   assert (rmesa->dri.drmMinor >= 3);
 
-   /* Make sure we keep a copy of the initial state.
-    */
-   if (prim == 0) {
-      dirty = RADEON_UPLOAD_CONTEXT_ALL;
-      if (rmesa->state.texture.unit[0].texobj) dirty |= RADEON_UPLOAD_TEX0;
-      if (rmesa->state.texture.unit[1].texobj) dirty |= RADEON_UPLOAD_TEX1;
-   }
+   LOCK_HARDWARE( rmesa );
 
+   ret = radeonFlushCmdBufLocked( rmesa, caller );
 
-   if (dirty)
-   {
-      GLuint state = rmesa->store.statenr++;
+   UNLOCK_HARDWARE( rmesa );
 
-      emit_state( rmesa, &rmesa->store.state[state], dirty );
-/*        fprintf(stderr, "emit state %d, dirty %x rmesa->dirty %x\n", */
-/*  	      state, dirty, rmesa->state.hw.dirty ); */
-      rmesa->store.state[state].dirty = rmesa->state.hw.dirty;	/* override */
-      rmesa->store.texture[0][state] = rmesa->state.texture.unit[0].texobj;
-      rmesa->store.texture[1][state] = rmesa->state.texture.unit[1].texobj;
-      rmesa->state.hw.dirty = 0;
-/*        print_state( &rmesa->store.state[state] ); */
+   if (ret) {
+      fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret);
+      exit(ret);
    }
+}
 
-   rmesa->store.prim[prim].stateidx = rmesa->store.statenr - 1;
 
-/*     fprintf(stderr, "emit_prim %d hwprim 0x%x vfmt 0x%x %d..%d %d verts stateidx %x\n", */
-/*  	   prim, */
-/*  	   rmesa->store.prim[prim].prim, */
-/*  	   rmesa->store.prim[prim].vc_format, */
-/*  	   rmesa->store.prim[prim].start, */
-/*  	   rmesa->store.prim[prim].finish, */
-/*  	   rmesa->store.prim[prim].numverts, */
-/*  	   rmesa->store.prim[prim].stateidx); */
-}
+/* =============================================================
+ * Hardware vertex buffer handling
+ */
 
 
-void radeonFlushPrimsLocked( radeonContextPtr rmesa )
+void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
 {
-   XF86DRIClipRectPtr pbox = (XF86DRIClipRectPtr)rmesa->pClipRects;
-   int nbox = rmesa->numClipRects;
-   drmBufPtr buffer = rmesa->dma.buffer;
-   drmRadeonVertex2 v;
-   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+   struct radeon_dma_buffer *dmabuf;
    int fd = rmesa->dri.fd;
-   int discard_sz = rmesa->dma.high - rmesa->dma.low < 4096;
-   int discard = (rmesa->dma.retained != rmesa->dma.buffer &&
-		  discard_sz);
-   int i;
+   int index = 0;
+   int size = 0;
+   drmDMAReq dma;
+   int ret;
 
-   if ( !nbox )
-      rmesa->store.primnr = 0;
-   else if ( nbox >= RADEON_NR_SAREA_CLIPRECTS ) {
-      rmesa->upload_cliprects = 1;
-      for ( i = 0 ; i < rmesa->store.statenr ; i++ )
-	 rmesa->store.state[0].dirty |= rmesa->store.state[i].dirty;
-      if ( !rmesa->store.texture[0][0] )
-	 rmesa->store.state[0].dirty &= ~RADEON_UPLOAD_TEX0;
-      if ( !rmesa->store.texture[1][0] )
-	 rmesa->store.state[0].dirty &= ~RADEON_UPLOAD_TEX1;
+   if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+      fprintf(stderr, "%s\n", __FUNCTION__);  
+
+   if (rmesa->dma.flush) {
+      rmesa->dma.flush( rmesa );
    }
 
+   if (rmesa->dma.current.buf)
+      radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
 
-/*     fprintf(stderr, "%s: boxes: %d prims: %d states: %d vertexstore: 0x%x\n", */
-/*  	   __FUNCTION__, */
-/*  	   sarea->nbox, rmesa->store.primnr, rmesa->store.statenr, */
-/*  	   rmesa->dma.low - rmesa->store.prim[0].start); */
+   if (rmesa->dma.nr_released_bufs > 4)
+      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
 
-   if ( !rmesa->upload_cliprects || !rmesa->store.primnr )
-   {
-      if ( nbox == 1 ) {
-	 sarea->nbox = 0;
-      } else {
-	 sarea->nbox = nbox;
-      }
+   dma.context = rmesa->dri.hwContext;
+   dma.send_count = 0;
+   dma.send_list = NULL;
+   dma.send_sizes = NULL;
+   dma.flags = 0;
+   dma.request_count = 1;
+   dma.request_size = RADEON_BUFFER_SIZE;
+   dma.request_list = &index;
+   dma.request_sizes = &size;
+   dma.granted_count = 0;
 
-/*        fprintf(stderr, "case a %d boxes %d prims %d states\n", */
-/*  	      sarea->nbox, rmesa->store.primnr, rmesa->store.statenr); */
-      if (discard || rmesa->store.primnr) {
-         v.idx       = buffer->idx;
-         v.discard   = discard;
-         v.nr_states = rmesa->store.statenr;
-         v.state     = rmesa->store.state;
-         v.nr_prims  = rmesa->store.primnr;
-         v.prim      = rmesa->store.prim;
-
-         drmCommandWrite( fd, DRM_RADEON_VERTEX2, &v, sizeof(drmRadeonVertex2));
-      }
-   }
-   else
-   {
-      for ( i = 0 ; i < nbox ; ) {
-	 int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox );
-	 XF86DRIClipRectPtr b = sarea->boxes;
-	 int discard_now = 0;
-
-	 /* TODO: Precalculate this intersection:
-	  */
-	 if ( rmesa->state.scissor.enabled ) {
-	    sarea->nbox = 0;
-
-	    for ( ; i < nr ; i++ ) {
-	       *b = pbox[i];
-	       if ( intersect_rect( b, b, &rmesa->state.scissor.rect ) ) {
-		  sarea->nbox++;
-		  b++;
-	       }
-	    }
-
-	    /* Culled?
-	     */
-	    if ( !sarea->nbox ) {
-	       if ( nr < nbox ) continue;
-	       rmesa->store.primnr = 0;
-	    }
-	 } else {
-	    sarea->nbox = nr - i;
-	    for ( ; i < nr ; i++) {
-	       *b++ = pbox[i];
-	    }
-	 }
+   LOCK_HARDWARE(rmesa);	/* no need to validate */
 
-	 /* Finished with the buffer?
-	  */
-	 if ( nr == nbox ) {
-	    discard_now = discard;
-	 }
+   ret = drmDMA( fd, &dma );
+      
+   if (ret != 0) {
+      /* Free some up this way?
+       */
+      if (rmesa->dma.nr_released_bufs) {
+	 radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+      }
+      
+      if (RADEON_DEBUG & DEBUG_DMA)
+	 fprintf(stderr, "Waiting for buffers\n");
 
-/*  	 fprintf(stderr, "case a %d boxes %d prims %d states, discard: %d\n", */
-/*  		 sarea->nbox, rmesa->store.primnr, rmesa->store.statenr, discard); */
-         v.idx       = buffer->idx;
-         v.discard   = discard_now;
-         v.nr_states = rmesa->store.statenr;
-         v.state     = rmesa->store.state;
-         v.nr_prims  = rmesa->store.primnr;
-         v.prim      = rmesa->store.prim;
+      radeonWaitForIdleLocked( rmesa );
+      ret = drmDMA( fd, &dma );
 
-         drmCommandWrite( fd, DRM_RADEON_VERTEX2, &v, sizeof(drmRadeonVertex2));
+      if ( ret != 0 ) {
+	 UNLOCK_HARDWARE( rmesa );
+	 fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
+	 exit( -1 );
       }
    }
 
-   if (discard_sz) {
-      rmesa->dma.buffer = 0;
-      rmesa->dma.address = 0;
-      rmesa->dma.low = 0;
-      rmesa->dma.high = 0;
-   }
-   else {
-      rmesa->dma.low = (rmesa->dma.low + 0x7) & ~0x7;  /* alignment */
-   }
-   rmesa->dma.last = rmesa->dma.low;
-   rmesa->store.primnr = 0;
-   rmesa->store.statenr = 0;
-   rmesa->upload_cliprects = 0;
-   rmesa->num_verts = 0;
-}
+   UNLOCK_HARDWARE(rmesa);
 
-void radeonFlushPrimsGetBuffer( radeonContextPtr rmesa )
-{
-   if (rmesa->dma.low != rmesa->dma.last)
-      emit_prim( rmesa );
+   if (RADEON_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "Allocated buffer %d\n", index);
 
-   LOCK_HARDWARE(rmesa);
+   dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
+   dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
+   dmabuf->refcount = 1;
 
-   if (rmesa->dma.buffer) {
-      rmesa->dma.low = rmesa->dma.high; /* force discard */
-      rmesa->dma.last = rmesa->dma.low;
-      radeonFlushPrimsLocked( rmesa );
-   }
+   rmesa->dma.current.buf = dmabuf;
+   rmesa->dma.current.address = dmabuf->buf->address;
+   rmesa->dma.current.end = dmabuf->buf->total;
+   rmesa->dma.current.start = 0;
+   rmesa->dma.current.ptr = 0;
 
-   rmesa->dma.buffer = radeonGetBufferLocked( rmesa );
-   rmesa->dma.high = rmesa->dma.buffer->total;
-   rmesa->dma.address = (GLubyte *)rmesa->dma.buffer->address;
-   rmesa->dma.low = 0;
-   rmesa->num_verts = 0;
-   rmesa->dma.last = rmesa->dma.low;
-   UNLOCK_HARDWARE(rmesa);
+   rmesa->c_vertexBuffers++;
 }
 
-
-void radeonFlushPrims( radeonContextPtr rmesa )
+void radeonReleaseDmaRegion( radeonContextPtr rmesa,
+			     struct radeon_dma_region *region,
+			     const char *caller )
 {
-   if (rmesa->dma.buffer) {
-      if (rmesa->dma.low != rmesa->dma.last)
-	 emit_prim( rmesa );
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
+   
+   if (!region->buf)
+      return;
+
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
 
-      LOCK_HARDWARE( rmesa );
-      radeonFlushPrimsLocked( rmesa );
-      UNLOCK_HARDWARE( rmesa );
+   if (--region->buf->refcount == 0) {
+      drmRadeonCmdHeader *cmd;
+
+      if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
+		 region->buf->buf->idx);  
+      
+      cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), 
+						     __FUNCTION__ );
+      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
+      cmd->dma.buf_idx = region->buf->buf->idx;
+      FREE(region->buf);
+      rmesa->dma.nr_released_bufs++;
    }
+
+   region->buf = 0;
+   region->start = 0;
 }
 
-void radeonEmitPrim( radeonContextPtr rmesa )
+/* Allocates a region from rmesa->dma.current.  If there isn't enough
+ * space in current, grab a new buffer (and discard what was left of current)
+ */
+void radeonAllocDmaRegion( radeonContextPtr rmesa, 
+			   struct radeon_dma_region *region,
+			   int bytes,
+			   int alignment )
 {
-   ASSERT(rmesa->dma.buffer);
-   emit_prim( rmesa );
-
-   if (rmesa->store.primnr == RADEON_MAX_PRIMS ||
-       rmesa->store.statenr == RADEON_MAX_STATES) {
-      LOCK_HARDWARE(rmesa);
-      radeonFlushPrimsLocked(rmesa);
-      UNLOCK_HARDWARE(rmesa);
-   }
-   else {
-      rmesa->dma.low = (rmesa->dma.low + 0x7) & ~0x7;  /* alignment */
-      rmesa->dma.last = rmesa->dma.low;
-      rmesa->num_verts = 0;
-   }
-}
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
 
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
 
-/* ================================================================
- * Texture uploads
- */
+   if (region->buf)
+      radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
 
-void radeonFireBlitLocked( radeonContextPtr rmesa, drmBufPtr buffer,
-			   GLint offset, GLint pitch, GLint format,
-			   GLint x, GLint y, GLint width, GLint height )
-{
-#if 0
-   drmRadeonTextureBlitType texture;
-   GLint ret;
+   alignment--;
+   rmesa->dma.current.start = rmesa->dma.current.ptr = 
+      (rmesa->dma.current.ptr + alignment) & ~alignment;
 
-   texture.idx    = buffer->idx;
-   texture.offset = offset;
-   texture.pitch  = pitch;
-   texture.format = format;
-   texture.x      = x;
-   texture.y      = y;
-   texture.width  = width;
-   texture.height = height;
+   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+      radeonRefillCurrentDmaRegion( rmesa );
 
-   ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_TEXTURE_BLIT,
-                          &texture, sizeof(drmRadeonTexture));
+   region->start = rmesa->dma.current.start;
+   region->ptr = rmesa->dma.current.start;
+   region->end = rmesa->dma.current.start + bytes;
+   region->address = rmesa->dma.current.address;
+   region->buf = rmesa->dma.current.buf;
+   region->buf->refcount++;
 
-   if ( ret ) {
-      UNLOCK_HARDWARE( rmesa );
-      fprintf( stderr, "DRM_RADEON_TEXTURE_BLIT: return = %d\n", ret );
-      exit( 1 );
-   }
-#endif
+   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+   rmesa->dma.current.start = 
+      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
+
+   if ( rmesa->dri.drmMinor < 3 ) 
+      radeonRefillCurrentDmaRegion( rmesa );
 }
 
+void radeonAllocDmaRegionVerts( radeonContextPtr rmesa, 
+				struct radeon_dma_region *region,
+				int numverts,
+				int vertsize,
+				int alignment )
+{
+   radeonAllocDmaRegion( rmesa, region, vertsize * numverts, alignment );
+}
 
 /* ================================================================
  * SwapBuffers with client-side throttling
@@ -512,11 +619,12 @@ static int radeonWaitForFrameCompletion( radeonContextPtr rmesa )
       }
       wait++;
       /* Spin in place a bit so we aren't hammering the bus */
-      for ( i = 0 ; i < 1024 ; i++ ) {
+      for ( i = 0 ; i < 10000 ; i++ ) {
 	 delay();
       }
    }
 
+/*     fprintf(stderr, "%d\n", wait); */
    return wait;
 }
 
@@ -533,7 +641,7 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
 
    rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
+   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
       fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, rmesa->glCtx );
    }
 
@@ -541,7 +649,6 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
 
    LOCK_HARDWARE( rmesa );
 
-   nbox = rmesa->dri.drawable->numClipRects; /* must be in locked region */
 
    /* Throttle the frame rate -- only allow one pending swap buffers
     * request at a time.
@@ -552,7 +659,7 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
       rmesa->hardwareWentIdle = 0;
    }
 
-   nbox = dPriv->numClipRects;
+   nbox = rmesa->dri.drawable->numClipRects; /* must be in locked region */
 
    for ( i = 0 ; i < nbox ; ) {
       GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
@@ -576,18 +683,6 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
    }
 
    UNLOCK_HARDWARE( rmesa );
-
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT_ALL );
-   if ( rmesa->state.texture.unit[0].texobj )
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   if ( rmesa->state.texture.unit[1].texobj )
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-
-
-   rmesa->upload_cliprects = 1;
-
-   /* Log the performance counters if necessary */
-   radeonPerformanceCounters( rmesa );
 }
 
 void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
@@ -601,9 +696,9 @@ void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
 
    rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
-      fprintf( stderr, "\n%s( %p ): page=%d\n\n",
-	       __FUNCTION__, rmesa->glCtx, rmesa->currentPage );
+   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+      fprintf(stderr, "%s %d\n", __FUNCTION__, 
+	      rmesa->sarea->pfCurrentPage );
    }
 
    RADEON_FIREVERTICES( rmesa );
@@ -619,43 +714,28 @@ void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
       rmesa->hardwareWentIdle = 0;
    }
 
-   /* The kernel will have been initialized to perform page flipping
-    * on a swapbuffers ioctl.
-    */
-   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
+   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
 
    UNLOCK_HARDWARE( rmesa );
 
    if ( ret ) {
-      fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
+      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
       exit( 1 );
    }
 
-   if ( rmesa->currentPage == 0 ) {
+   if ( rmesa->sarea->pfCurrentPage == 1 ) {
 	 rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
 	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
-	 rmesa->currentPage = 1;
    } else {
 	 rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
 	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
-	 rmesa->currentPage = 0;
    }
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-   rmesa->state.hw.context.rb3d_coloroffset = rmesa->state.color.drawOffset;
-   rmesa->state.hw.context.rb3d_colorpitch  = rmesa->state.color.drawPitch;
-
-   /* Log the performance counters if necessary */
-   radeonPerformanceCounters( rmesa );
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset;
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
 }
 
-void radeonPerformanceCounters( radeonContextPtr rmesa )
-{
-}
-
-void radeonPerformanceBoxesLocked( radeonContextPtr rmesa )
-{
-}
 
 /* ================================================================
  * Buffer clear
@@ -672,25 +752,27 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
    CARD32 clear;
    GLuint flags = 0;
    GLuint color_mask = 0;
-/*     GLuint depth_mask = 0; */
    GLint ret, i;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
+   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
       fprintf( stderr, "%s:  all=%d cx=%d cy=%d cw=%d ch=%d\n",
 	       __FUNCTION__, all, cx, cy, cw, ch );
    }
 
-   RADEON_FIREVERTICES( rmesa );
+   /* Need to cope with lostcontext here as kernel relies on
+    * some residual state:
+    */
+   RADEON_FIREVERTICES( rmesa ); 
 
    if ( mask & DD_FRONT_LEFT_BIT ) {
       flags |= RADEON_FRONT;
-      color_mask = rmesa->state.hw.mask.rb3d_planemask;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~DD_FRONT_LEFT_BIT;
    }
 
    if ( mask & DD_BACK_LEFT_BIT ) {
       flags |= RADEON_BACK;
-      color_mask = rmesa->state.hw.mask.rb3d_planemask;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~DD_BACK_LEFT_BIT;
    }
 
@@ -704,123 +786,98 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
       mask &= ~DD_STENCIL_BIT;
    }
 
-   if ( flags ) {
-      /* Flip top to bottom */
-      cx += dPriv->x;
-      cy  = dPriv->y + dPriv->h - cy - ch;
+   if ( mask )
+      _swrast_Clear( ctx, mask, all, cx, cy, cw, ch );
 
-      LOCK_HARDWARE( rmesa );
+   if ( !flags ) 
+      return;
 
-      /* Throttle the number of clear ioctls we do.
-       */
-      while ( 1 ) {
-	 clear = INREG( RADEON_LAST_CLEAR_REG );
-	 if ( sarea->last_clear - clear <= RADEON_MAX_CLEARS ) {
-	    break;
-	 }
-	 /* Spin in place a bit so we aren't hammering the bus */
-	 for ( i = 0 ; i < 1024 ; i++ ) {
-	    delay();
-	 }
-      }
 
-      /* Emit any new MASKS state.  This ioctl uses the old
-       * sarea-based state mechanism, which is why I'm not using
-       * emit_state() above.  Time for a new ioctl?  
-       */
-      if ( rmesa->state.hw.dirty ) {
-	 memcpy( &sarea->ContextState, &rmesa->state.hw, 
-		 sizeof(sarea->ContextState));
-	 sarea->dirty |= RADEON_UPLOAD_CONTEXT_ALL;
+   /* Flip top to bottom */
+   cx += dPriv->x;
+   cy  = dPriv->y + dPriv->h - cy - ch;
+
+   LOCK_HARDWARE( rmesa );
+
+   /* Throttle the number of clear ioctls we do.
+    */
+   while ( 1 ) {
+      clear = INREG( RADEON_LAST_CLEAR_REG );
+      if ( sarea->last_clear - clear <= RADEON_MAX_CLEARS ) {
+	 break;
+      }
+      /* Spin in place a bit so we aren't hammering the bus */
+      for ( i = 0 ; i < 1024 ; i++ ) {
       }
+   }
 
+   for ( i = 0 ; i < dPriv->numClipRects ; ) {
+      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
+      XF86DRIClipRectPtr box = dPriv->pClipRects;
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      drmRadeonClearType clear;
+      drmRadeonClearRect depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
+      GLint n = 0;
 
-      for ( i = 0 ; i < dPriv->numClipRects ; ) {
-	 GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
-	 XF86DRIClipRectPtr box = dPriv->pClipRects;
-	 XF86DRIClipRectPtr b = rmesa->sarea->boxes;
-         drmRadeonClearType clear;
-         drmRadeonClearRect depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
-	 GLint n = 0;
-
-	 if ( !all ) {
-	    for ( ; i < nr ; i++ ) {
-	       GLint x = box[i].x1;
-	       GLint y = box[i].y1;
-	       GLint w = box[i].x2 - x;
-	       GLint h = box[i].y2 - y;
-
-	       if ( x < cx ) w -= cx - x, x = cx;
-	       if ( y < cy ) h -= cy - y, y = cy;
-	       if ( x + w > cx + cw ) w = cx + cw - x;
-	       if ( y + h > cy + ch ) h = cy + ch - y;
-	       if ( w <= 0 ) continue;
-	       if ( h <= 0 ) continue;
-
-	       b->x1 = x;
-	       b->y1 = y;
-	       b->x2 = x + w;
-	       b->y2 = y + h;
-	       b++;
-	       n++;
-	    }
-	 } else {
-	    for ( ; i < nr ; i++ ) {
-	       *b++ = box[i];
-	       n++;
-	    }
+      if ( !all ) {
+	 for ( ; i < nr ; i++ ) {
+	    GLint x = box[i].x1;
+	    GLint y = box[i].y1;
+	    GLint w = box[i].x2 - x;
+	    GLint h = box[i].y2 - y;
+
+	    if ( x < cx ) w -= cx - x, x = cx;
+	    if ( y < cy ) h -= cy - y, y = cy;
+	    if ( x + w > cx + cw ) w = cx + cw - x;
+	    if ( y + h > cy + ch ) h = cy + ch - y;
+	    if ( w <= 0 ) continue;
+	    if ( h <= 0 ) continue;
+
+	    b->x1 = x;
+	    b->y1 = y;
+	    b->x2 = x + w;
+	    b->y2 = y + h;
+	    b++;
+	    n++;
 	 }
-
-	 rmesa->sarea->nbox = n;
-
-/*  	    fprintf( stderr, */
-/*  		     "DRM_RADEON_CLEAR: flag 0x%x color %x depth %x sten %x nbox %d\n", */
-/*  		     flags, */
-/*  		     rmesa->state.color.clear, */
-/*  		     rmesa->state.depth.clear, */
-/*  		     rmesa->state.stencil.clear, */
-/*  		     rmesa->sarea->nbox ); */
-
-         clear.flags       = flags;
-         clear.clear_color = rmesa->state.color.clear;
-         clear.clear_depth = rmesa->state.depth.clear;
-         clear.color_mask  = rmesa->state.hw.mask.rb3d_planemask,
-         clear.depth_mask  = rmesa->state.stencil.clear,
-         clear.depth_boxes = depth_boxes;
-
-         nr = rmesa->sarea->nbox;
-         b = rmesa->sarea->boxes;
-         for ( i = 0 ; i < nr ; i++ ) {
-           depth_boxes[i].f[RADEON_CLEAR_X1] = (float)b[i].x1;
-           depth_boxes[i].f[RADEON_CLEAR_Y1] = (float)b[i].y1;
-           depth_boxes[i].f[RADEON_CLEAR_X2] = (float)b[i].x2;
-           depth_boxes[i].f[RADEON_CLEAR_Y2] = (float)b[i].y2;
-           depth_boxes[i].f[RADEON_CLEAR_DEPTH] = 
-                              (float)rmesa->state.depth.clear;
-         }
-
-         ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
-                                &clear, sizeof(drmRadeonClearType));
-
-	 if ( ret ) {
-	    UNLOCK_HARDWARE( rmesa );
-	    fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
-	    exit( 1 );
+      } else {
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = box[i];
+	    n++;
 	 }
       }
 
-      UNLOCK_HARDWARE( rmesa );
+      rmesa->sarea->nbox = n;
 
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT_ALL );
-      if ( rmesa->state.texture.unit[0].texobj )
-	 RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-      if ( rmesa->state.texture.unit[1].texobj )
-	 RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-      rmesa->upload_cliprects = 1;
+      clear.flags       = flags;
+      clear.clear_color = rmesa->state.color.clear;
+      clear.clear_depth = rmesa->state.depth.clear;
+      clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      clear.depth_mask  = rmesa->state.stencil.clear;
+      clear.depth_boxes = depth_boxes;
+
+      nr = rmesa->sarea->nbox;
+      b = rmesa->sarea->boxes;
+      for ( i = 0 ; i < nr ; i++ ) {
+	 depth_boxes[i].f[RADEON_CLEAR_X1] = (float)b[i].x1;
+	 depth_boxes[i].f[RADEON_CLEAR_Y1] = (float)b[i].y1;
+	 depth_boxes[i].f[RADEON_CLEAR_X2] = (float)b[i].x2;
+	 depth_boxes[i].f[RADEON_CLEAR_Y2] = (float)b[i].y2;
+	 depth_boxes[i].f[RADEON_CLEAR_DEPTH] = 
+	    (float)rmesa->state.depth.clear;
+      }
+
+      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
+			     &clear, sizeof(drmRadeonClearType));
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( rmesa );
+	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
+	 exit( 1 );
+      }
    }
 
-   if ( mask )
-      _swrast_Clear( ctx, mask, all, cx, cy, cw, ch );
+   UNLOCK_HARDWARE( rmesa );
 }
 
 
@@ -828,7 +885,9 @@ void radeonWaitForIdleLocked( radeonContextPtr rmesa )
 {
     int fd = rmesa->dri.fd;
     int to = 0;
-    int ret, i;
+    int ret, i = 0;
+
+    rmesa->c_drawWaits++;
 
     do {
         do {
@@ -852,37 +911,68 @@ void radeonWaitForIdleLocked( radeonContextPtr rmesa )
 }
 
 
-void radeonInitIoctlFuncs( GLcontext *ctx )
+static void radeonWaitForIdle( radeonContextPtr rmesa )
 {
-    ctx->Driver.Clear = radeonClear;
+    LOCK_HARDWARE(rmesa);
+    radeonWaitForIdleLocked( rmesa );
+    UNLOCK_HARDWARE(rmesa);
 }
 
+void radeonGetAllParams( radeonContextPtr rmesa )
+{
+   int ret;
+   drmRadeonGetParam gp;
+
+   gp.param = RADEON_PARAM_AGP_BUFFER_OFFSET;
+   gp.value = &rmesa->dri.agp_buffer_offset;
 
+   ret = drmCommandWriteRead( rmesa->dri.fd,
+			      DRM_RADEON_GETPARAM,
+			      &gp, sizeof(gp));
+   if (ret) {
+      fprintf(stderr, "drmRadeonGetParam: %d\n", ret);
+      exit(1);
+   }
+}
 
-void radeonReleaseRetainedBuffer( radeonContextPtr rmesa )
+void radeonFlush( GLcontext *ctx )
 {
-   drmRadeonVertex2 v;
-   ASSERT(rmesa->dma.retained);
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 
-   if (rmesa->dma.retained &&
-       rmesa->dma.retained != rmesa->dma.buffer) {
-      RADEON_FIREVERTICES(rmesa); /* FIX ME: dependency tracking for retained */
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
-/*        fprintf(stderr, "releaseRetained: retained %p current %p\n", */
-/*  	      rmesa->dma.retained, rmesa->dma.buffer); */
-      
-      LOCK_HARDWARE(rmesa);
-      v.idx       = rmesa->dma.retained->idx;
-      v.discard   = 1;
-      v.nr_states = 0;
-      v.state     = rmesa->store.state;
-      v.nr_prims  = 0;
-      v.prim      = rmesa->store.prim;
-
-      drmCommandWrite( rmesa->dri.fd, DRM_RADEON_VERTEX2,
-                       &v, sizeof(drmRadeonVertex2));
-      UNLOCK_HARDWARE(rmesa);
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
+
+   if (rmesa->dri.drmMinor >= 3) {
+      if (!is_empty_list(&rmesa->hw.dirty)) 
+	 radeonEmitState( rmesa );
+   
+      if (rmesa->store.cmd_used)
+	 radeonFlushCmdBuf( rmesa, __FUNCTION__ );
    }
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+void radeonFinish( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonFlush( ctx );
+   radeonWaitForIdle( rmesa );
+}
 
-   rmesa->dma.retained = 0;
+
+void radeonInitIoctlFuncs( GLcontext *ctx )
+{
+    ctx->Driver.Clear = radeonClear;
+    ctx->Driver.Finish = radeonFinish;
+    ctx->Driver.Flush = radeonFlush;
+    
+    if (RADEON_CONTEXT(ctx)->dri.drmMinor >= 3) {
+       radeonGetAllParams( RADEON_CONTEXT( ctx ) );
+    }
 }
+
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h
index e77858f61..683795db5 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h
@@ -39,90 +39,140 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #ifdef GLX_DIRECT_RENDERING
 
+#include "simple_list.h"
 #include "radeon_dri.h"
 #include "radeon_lock.h"
 
 #include "xf86drm.h"
 #include "radeon_common.h"
 
-#define RADEON_BUFFER_MAX_DWORDS	(RADEON_BUFFER_SIZE / sizeof(CARD32))
+extern void radeonEmitState( radeonContextPtr rmesa );
+extern void radeonEmitVertexAOS( radeonContextPtr rmesa,
+				 GLuint vertex_size,
+				 GLuint offset );
 
+extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
+				GLuint vertex_format,
+				GLuint primitive,
+				GLuint vertex_nr );
+
+extern void radeonFlushElts( radeonContextPtr rmesa );
+
+extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
+					   GLuint vertex_format,
+					   GLuint primitive,
+					   GLuint min_nr );
+
+extern void radeonEmitAOS( radeonContextPtr rmesa,
+			   struct radeon_dma_region **regions,
+			   GLuint n,
+			   GLuint offset );
+
+
+
+extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * );
+extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa );
+
+extern void radeonAllocDmaRegion( radeonContextPtr rmesa,
+				  struct radeon_dma_region *region,
+				  int bytes, 
+				  int alignment );
+
+extern void radeonAllocDmaRegionVerts( radeonContextPtr rmesa,
+				       struct radeon_dma_region *region,
+				       int numverts,
+				       int vertsize, 
+				       int alignment );
+
+extern void radeonReleaseDmaRegion( radeonContextPtr rmesa,
+				    struct radeon_dma_region *region,
+				    const char *caller );
 
-extern drmBufPtr radeonGetBufferLocked( radeonContextPtr rmesa );
-extern void radeonEmitPrim( radeonContextPtr rmesa );
-extern void radeonFlushPrims( radeonContextPtr rmesa );
-extern void radeonFlushPrimsLocked( radeonContextPtr rmesa );
-extern void radeonFlushPrimsGetBuffer( radeonContextPtr rmesa );
-extern void radeonFireBlitLocked( radeonContextPtr rmesa,
-				  drmBufPtr buffer,
-				  GLint offset, GLint pitch, GLint format,
-				  GLint x, GLint y,
-				  GLint width, GLint height );
 extern void radeonCopyBuffer( const __DRIdrawablePrivate *drawable );
 extern void radeonPageFlip( const __DRIdrawablePrivate *drawable );
+extern void radeonFlush( GLcontext *ctx );
+extern void radeonFinish( GLcontext *ctx );
 extern void radeonWaitForIdleLocked( radeonContextPtr rmesa );
-extern void radeonPerformanceCounters( radeonContextPtr rmesa );
-extern void radeonPerformanceBoxesLocked( radeonContextPtr rmesa );
 extern void radeonInitIoctlFuncs( GLcontext *ctx );
-extern void radeonReleaseRetainedBuffer( radeonContextPtr rmesa );
+extern void radeonGetAllParams( radeonContextPtr rmesa );
+
+/* radeon_compat.c:
+ */
+extern void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
+				       GLuint vertex_format,
+				       GLuint hw_primitive,
+				       GLuint nrverts );
 
 
 /* ================================================================
  * Helper macros:
  */
 
+/* Close off the last primitive, if it exists.
+ */
+#define RADEON_NEWPRIM( rmesa )			\
+do {						\
+   if ( rmesa->dma.flush )			\
+      rmesa->dma.flush( rmesa );	\
+} while (0)
+
 /* Can accomodate several state changes and primitive changes without
  * actually firing the buffer.
  */
-#define RADEON_STATECHANGE( rmesa, flag )				\
-do {									\
-   if ( 0 ) radeonPrintDirty( __FUNCTION__, flag );			\
-   if ( rmesa->dma.low != rmesa->dma.last )				\
-      radeonEmitPrim( rmesa );						\
-   rmesa->state.hw.dirty |= flag;					\
+#define RADEON_STATECHANGE( rmesa, ATOM )			\
+do {								\
+   RADEON_NEWPRIM( rmesa );					\
+   move_to_head( &(rmesa->hw.dirty), &(rmesa->hw.ATOM));	\
 } while (0)
 
+#define RADEON_DB_STATE( ATOM )			        \
+   memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd,	\
+	   rmesa->hw.ATOM.cmd_size * 4)
+
+static __inline int RADEON_DB_STATECHANGE( 
+   radeonContextPtr rmesa,
+   struct radeon_state_atom *atom )
+{
+   if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+      int *tmp;
+      RADEON_NEWPRIM( rmesa );
+      move_to_head( &(rmesa->hw.dirty), atom );
+      tmp = atom->cmd; 
+      atom->cmd = atom->lastcmd;
+      atom->lastcmd = tmp;
+      return 1;
+   }
+   else
+      return 0;
+}
+
 
 /* Fire the buffered vertices no matter what.
  */
-#define RADEON_FIREVERTICES( rmesa )					\
-do {									\
-   if ( rmesa->store.primnr || rmesa->dma.low != rmesa->dma.last ) {	\
-      if ( 0 )								\
-	 fprintf( stderr, "RADEON_FIREVERTICES in "__FUNCTION__"\n" );	\
-      radeonFlushPrims( rmesa );					\
-   }									\
+#define RADEON_FIREVERTICES( rmesa )			\
+do {							\
+   if ( rmesa->store.cmd_used || rmesa->dma.flush ) {	\
+      radeonFlush( rmesa->glCtx );			\
+   }							\
 } while (0)
 
-
-static __inline void *radeonAllocDmaLow( radeonContextPtr rmesa,
+/* Alloc space in the command buffer
+ */
+static __inline char *radeonAllocCmdBuf( radeonContextPtr rmesa,
 					 int bytes, const char *where )
 {
-   if ( rmesa->dma.low + bytes > rmesa->dma.high ) {
-      if (0) fprintf( stderr, "%s flush for %d (%d/%d/%d)\n",
-		      where, bytes, rmesa->dma.last,
-		      rmesa->dma.low, rmesa->dma.high );
-      radeonFlushPrimsGetBuffer( rmesa );
-   }
+   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
+      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+   
+   assert(rmesa->dri.drmMinor >= 3);
 
    {
-      GLubyte *head = rmesa->dma.address + rmesa->dma.low;
-      if (0) fprintf( stderr, "%s: alloc %d (%d/%d/%d)\n",
-		      where, bytes, rmesa->dma.last,
-		      rmesa->dma.low, rmesa->dma.high );
-      rmesa->dma.low += bytes;
+      char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+      rmesa->store.cmd_used += bytes;
       return head;
    }
 }
 
-static __inline void *radeonAllocDmaHigh( radeonContextPtr rmesa, int bytes )
-{
-   if ( rmesa->dma.low + bytes > rmesa->dma.high )
-      radeonFlushPrimsGetBuffer( rmesa );
-
-   rmesa->dma.high -= bytes;
-   return (void *)(rmesa->dma.address + rmesa->dma.high);
-}
 
 
 
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.c
index 6fd9a1389..98db6b56c 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.c
@@ -38,12 +38,42 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_lock.h"
 #include "radeon_tex.h"
 #include "radeon_state.h"
+#include "radeon_ioctl.h"
 
 #if DEBUG_LOCKING
 char *prevLockFile = NULL;
 int prevLockLine = 0;
 #endif
 
+/* Turn on/off page flipping according to the flags in the sarea:
+ */
+static void
+radeonUpdatePageFlipping( radeonContextPtr rmesa )
+{
+
+   rmesa->doPageFlip = (rmesa->sarea->pfAllowPageFlip && 
+			rmesa->dri.drmMinor >= 3);
+
+   fprintf(stderr, "%s allow %d current %d\n", __FUNCTION__, 
+	   rmesa->doPageFlip,
+	   rmesa->sarea->pfCurrentPage );
+
+   if ( rmesa->glCtx->Visual.doubleBufferMode && 
+	rmesa->sarea->pfCurrentPage == 0 ) {
+	 rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+   } else {
+	 rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+   }
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset;
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
+
+}
+
+
 
 /* Update the hardware state.  This is called if another context has
  * grabbed the hardware lock, which includes the X server.  This
@@ -73,6 +103,7 @@ void radeonGetLock( radeonContextPtr rmesa, GLuint flags )
    DRI_VALIDATE_DRAWABLE_INFO( rmesa->dri.display, sPriv, dPriv );
 
    if ( rmesa->lastStamp != dPriv->lastStamp ) {
+      radeonUpdatePageFlipping( rmesa );
       radeonSetCliprects( rmesa, rmesa->glCtx->Color.DriverDrawBuffer );
       radeonUpdateViewportOffset( rmesa->glCtx );
       rmesa->lastStamp = dPriv->lastStamp;
@@ -81,22 +112,6 @@ void radeonGetLock( radeonContextPtr rmesa, GLuint flags )
    if ( sarea->ctxOwner != rmesa->dri.hwContext ) {
       sarea->ctxOwner = rmesa->dri.hwContext;
 
-      rmesa->upload_cliprects = 1;
-      if ( rmesa->store.statenr ) {
-	 rmesa->store.state[0].dirty = RADEON_UPLOAD_CONTEXT_ALL;
-	 if ( rmesa->store.texture[0][0] )
-	    rmesa->store.state[0].dirty |= RADEON_UPLOAD_TEX0;
-	 if ( rmesa->store.texture[1][0] )
-	    rmesa->store.state[0].dirty |= RADEON_UPLOAD_TEX1;
-      }
-      else {
-	 rmesa->state.hw.dirty = RADEON_UPLOAD_CONTEXT_ALL;
-	 if ( rmesa->state.texture.unit[0].texobj )
-	    rmesa->state.hw.dirty |= RADEON_UPLOAD_TEX0;
-	 if ( rmesa->state.texture.unit[1].texobj )
-	    rmesa->state.hw.dirty |= RADEON_UPLOAD_TEX1;
-      }
-
       for ( i = 0 ; i < rmesa->texture.numHeaps ; i++ ) {
 	 if ( sarea->texAge[i] != rmesa->texture.age[i] ) {
 	    radeonAgeTextures( rmesa, i );
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.h
index 229eb65e6..a3b033b33 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.h
@@ -85,17 +85,18 @@ extern int prevLockLine;
  * do not do any drawing !!!
  */
 
+
 /* Lock the hardware and validate our state.
  */
-#define LOCK_HARDWARE( rmesa )						\
-   do {									\
-      char __ret = 0;							\
-      DEBUG_CHECK_LOCK();						\
-      DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext,			\
-	       (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret );		\
-      if ( __ret )							\
-	 radeonGetLock( rmesa, 0 );					\
-      DEBUG_LOCK();							\
+#define LOCK_HARDWARE( rmesa )					\
+   do {								\
+      char __ret = 0;						\
+      DEBUG_CHECK_LOCK();					\
+      DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext,		\
+	       (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret );	\
+      if ( __ret )						\
+	 radeonGetLock( rmesa, 0 );				\
+      DEBUG_LOCK();						\
    } while (0)
 
 /* Unlock the hardware.
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_maos.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos.c
new file mode 100644
index 000000000..c62edd715
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos.c
@@ -0,0 +1,12 @@
+
+
+/* If using new packets, can choose either verts or arrays.
+ * Otherwise, must use verts.
+ */
+#include "radeon_context.h"
+#define RADEON_MAOS_VERTS 1
+#if (RADEON_MAOS_VERTS) || (RADEON_OLD_PACKETS)
+#include "radeon_maos_verts.c"
+#else
+#include "radeon_maos_arrays.c"
+#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_tris.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos.h
index b6193c92f..6f31969fd 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_tris.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos.h
@@ -2,7 +2,7 @@
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
-                     VA Linux Systems Inc., Fremont, California.
+                     Tungsten Grahpics Inc., Austin, Texas.
 
 All Rights Reserved.
 
@@ -20,7 +20,7 @@ Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ATI, TUNGSTEN GRAHPICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -29,15 +29,19 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 /*
  * Authors:
- *   Keith Whitwell <keithw@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
  *
  */
 
-#ifndef __RADEON_TRIS_H__
-#define __RADEON_TRIS_H__
+#ifndef __RADEON_MAOS_H__
+#define __RADEON_MAOS_H__
 
-#include "mtypes.h"
+#ifdef GLX_DIRECT_RENDERING
 
-extern void radeonInitTriFuncs( GLcontext *ctx );
+#include "radeon_context.h"
 
+extern void radeonEmitArrays( GLcontext *ctx, GLuint inputs );
+extern void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs );
+
+#endif
 #endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c
new file mode 100644
index 000000000..ffa2d9344
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c
@@ -0,0 +1,592 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "mem.h"
+#include "mmath.h"
+#include "macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_imm_debug.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+
+/* Usage:
+ *   - from radeon_tcl_render
+ *   - call radeonEmitArrays to ensure uptodate arrays in dma
+ *   - emit primitives (new type?) which reference the data
+ *       -- need to use elts for lineloop, quads, quadstrip/flat
+ *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
+ *
+ */
+static void emit_ubyte_rgba3( GLcontext *ctx,
+		       struct radeon_dma_region *rvb,
+		       char *data,
+		       int stride,
+		       int count )
+{
+   int i;
+   char *out = (char *)(rvb->start + rvb->address);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d out %p\n",
+	      __FUNCTION__, count, stride, out);
+
+   for (i = 0; i < count; i++) {
+      out[0] = *data;
+      out[1] = *(data+1);
+      out[2] = *(data+2);
+      out[3] = 0xFF;
+      out += 4;
+      data += stride;
+   }
+}
+
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr )					\
+do {									\
+	int __tmp;							\
+	__asm__ __volatile__( "rep ; movsl"				\
+			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
+			      : "0" (nr),				\
+			        "D" ((long)dst),			\
+			        "S" ((long)src) );			\
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr )		\
+do {						\
+   int j;					\
+   for ( j = 0 ; j < nr ; j++ )			\
+      dst[j] = ((int *)v)[j];			\
+   dst += nr;					\
+} while (0)
+#endif
+
+
+
+static void emit_ubyte_rgba4( GLcontext *ctx,
+			      struct radeon_dma_region *rvb,
+			      char *data,
+			      int stride,
+			      int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   if (stride == 4)
+       COPY_DWORDS( out, data, count );
+   else
+      for (i = 0; i < count; i++) {
+	 *out++ = *(int *)data;
+	 data += stride;
+      }
+}
+
+
+static void emit_ubyte_rgba( GLcontext *ctx,
+			     struct radeon_dma_region *rvb,
+			     char *data,
+			     int size,
+			     int stride,
+			     int count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+
+   assert (!rvb->buf);
+
+   if (stride == 0) {
+      radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
+      count = 1;
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 0;
+      rvb->aos_size = 1;
+   }
+   else {
+      radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 );	/* alignment? */
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 1;
+      rvb->aos_size = 1;
+   }
+
+   /* Emit the data
+    */
+   switch (size) {
+   case 3:
+      emit_ubyte_rgba3( ctx, rvb, data, stride, count );
+      break;
+   case 4:
+      emit_ubyte_rgba4( ctx, rvb, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+}
+
+
+
+
+static void emit_vec8( GLcontext *ctx,
+		       struct radeon_dma_region *rvb,
+		       char *data,
+		       int stride,
+		       int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   if (stride == 8)
+      COPY_DWORDS( out, data, count*2 );
+   else
+      for (i = 0; i < count; i++) {
+	 out[0] = *(int *)data;
+	 out[1] = *(int *)(data+4);
+	 out += 2;
+	 data += stride;
+      }
+}
+
+static void emit_vec12( GLcontext *ctx,
+		       struct radeon_dma_region *rvb,
+		       char *data,
+		       int stride,
+		       int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d out %x data %x\n",
+	      __FUNCTION__, count, stride, out, data);
+
+   if (stride == 12)
+      COPY_DWORDS( out, data, count*3 );
+   else
+      for (i = 0; i < count; i++) {
+	 out[0] = *(int *)data;
+	 out[1] = *(int *)(data+4);
+	 out[2] = *(int *)(data+8);
+	 out += 3;
+	 data += stride;
+      }
+}
+
+static void emit_vec16( GLcontext *ctx,
+			struct radeon_dma_region *rvb,
+			char *data,
+			int stride,
+			int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   if (stride == 16)
+      COPY_DWORDS( out, data, count*4 );
+   else
+      for (i = 0; i < count; i++) {
+	 out[0] = *(int *)data;
+	 out[1] = *(int *)(data+4);
+	 out[2] = *(int *)(data+8);
+	 out[3] = *(int *)(data+12);
+	 out += 4;
+	 data += stride;
+      }
+}
+
+
+static void emit_vector( GLcontext *ctx,
+			 struct radeon_dma_region *rvb,
+			 char *data,
+			 int size,
+			 int stride,
+			 int count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+
+   assert (!rvb->buf);
+
+   if (stride == 0) {
+      radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
+      count = 1;
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 0;
+      rvb->aos_size = size;
+   }
+   else {
+      radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 );	/* alignment? */
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = size;
+      rvb->aos_size = size;
+   }
+
+   /* Emit the data
+    */
+   switch (size) {
+   case 2:
+      emit_vec8( ctx, rvb, data, stride, count );
+      break;
+   case 3:
+      emit_vec12( ctx, rvb, data, stride, count );
+      break;
+   case 4:
+      emit_vec16( ctx, rvb, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+
+}
+
+
+
+static void emit_s0_vec( GLcontext *ctx,
+			 struct radeon_dma_region *rvb,
+			 char *data,
+			 int stride,
+			 int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   for (i = 0; i < count; i++) {
+      out[0] = *(int *)data;
+      out[1] = 0;
+      out += 2;
+      data += stride;
+   }
+}
+
+static void emit_stq_vec( GLcontext *ctx,
+			 struct radeon_dma_region *rvb,
+			 char *data,
+			 int stride,
+			 int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   for (i = 0; i < count; i++) {
+      out[0] = *(int *)data;
+      out[1] = *(int *)(data+4);
+      out[2] = *(int *)(data+12);
+      out += 3;
+      data += stride;
+   }
+}
+
+
+
+
+static void emit_tex_vector( GLcontext *ctx,
+			     struct radeon_dma_region *rvb,
+			     char *data,
+			     int size,
+			     int stride,
+			     int count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int emitsize;
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+
+   assert (!rvb->buf);
+
+   switch (size) {
+   case 4: emitsize = 3; break;
+   default: emitsize = 2; break;
+   }
+
+
+   if (stride == 0) {
+      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
+      count = 1;
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 0;
+      rvb->aos_size = emitsize;
+   }
+   else {
+      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = emitsize;
+      rvb->aos_size = emitsize;
+   }
+
+
+   /* Emit the data
+    */
+   switch (size) {
+   case 1:
+      emit_s0_vec( ctx, rvb, data, stride, count ); 
+      break;
+   case 2:
+      emit_vec8( ctx, rvb, data, stride, count );
+      break;
+   case 3:
+      emit_vec8( ctx, rvb, data, stride, count );
+      break;
+   case 4:
+      emit_stq_vec( ctx, rvb, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+}
+
+
+
+
+/* Emit any changed arrays to new agp memory, re-emit a packet to
+ * update the arrays.  
+ */
+void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+   struct radeon_dma_region **component = rmesa->tcl.aos_components;
+   GLuint nr = 0;
+   GLuint vfmt = 0;
+   GLuint count = VB->Count;
+   GLuint vtx;
+   
+   if (RADEON_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, inputs );
+
+   if (1) {
+      if (!rmesa->tcl.obj.buf) 
+	 emit_vector( ctx, 
+		      &rmesa->tcl.obj, 
+		      (char *)VB->ObjPtr->data,
+		      VB->ObjPtr->size,
+		      VB->ObjPtr->stride,
+		      count);
+
+      switch( VB->ObjPtr->size ) {
+      case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
+      case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
+      case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
+      default:
+      }
+      component[nr++] = &rmesa->tcl.obj;
+   }
+   
+
+   if (inputs & VERT_NORM) {
+      if (!rmesa->tcl.norm.buf)
+	 emit_vector( ctx, 
+		      &(rmesa->tcl.norm), 
+		      (char *)VB->NormalPtr->data,
+		      3,
+		      VB->NormalPtr->stride,
+		      count);
+
+      vfmt |= RADEON_CP_VC_FRMT_N0;
+      component[nr++] = &rmesa->tcl.norm;
+   }
+
+   if (inputs & VERT_RGBA) {
+      if (VB->ColorPtr[0]->Type == GL_UNSIGNED_BYTE) {
+	 if (!rmesa->tcl.rgba.buf)
+	    emit_ubyte_rgba( ctx, 
+			     &rmesa->tcl.rgba, 
+			     (char *)VB->ColorPtr[0]->Ptr,
+			     VB->ColorPtr[0]->Size,
+			     VB->ColorPtr[0]->StrideB,
+			     count);
+
+	 vfmt |= RADEON_CP_VC_FRMT_PKCOLOR; 
+      }
+      else {
+	 int emitsize;
+
+	 if (VB->ColorPtr[0]->Size == 4 &&
+	     (VB->ColorPtr[0]->StrideB != 0 ||
+	      ((GLfloat *)VB->ColorPtr[0]->Ptr)[3] != 1.0)) { 
+	    vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
+	    emitsize = 4;
+	 }
+	 else { 
+	    vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
+	    emitsize = 3;
+	 }
+
+
+	 if (!rmesa->tcl.rgba.buf)
+	    emit_vector( ctx, 
+			 &(rmesa->tcl.rgba), 
+			 (char *)VB->ColorPtr[0]->Ptr,
+			 emitsize,
+			 VB->ColorPtr[0]->StrideB,
+			 count);
+      }
+
+      component[nr++] = &rmesa->tcl.rgba;
+   }
+
+
+   if (inputs & VERT_SPEC_RGB) {
+      if (!rmesa->tcl.spec.buf) {
+	 if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+	    radeon_import_float_spec_colors( ctx );
+
+	 emit_ubyte_rgba( ctx, 
+			  &rmesa->tcl.spec, 
+			  (char *)VB->SecondaryColorPtr[0]->Ptr,
+			  3,
+			  VB->SecondaryColorPtr[0]->StrideB,
+			  count);
+      }
+
+      vfmt |= RADEON_CP_VC_FRMT_PKSPEC; 
+      component[nr++] = &rmesa->tcl.spec;
+   }
+
+   vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
+	  ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1));
+      
+   if (inputs & VERT_TEX0) {
+      if (!rmesa->tcl.tex[0].buf)
+	 emit_tex_vector( ctx, 
+			  &(rmesa->tcl.tex[0]), 
+			  (char *)VB->TexCoordPtr[0]->data,
+			  VB->TexCoordPtr[0]->size,
+			  VB->TexCoordPtr[0]->stride,
+			  count );
+
+      switch( VB->TexCoordPtr[0]->size ) {
+      case 4:
+	 vtx |= RADEON_TCL_VTX_Q0; 
+	 vfmt |= RADEON_CP_VC_FRMT_Q0;
+      default: 
+	 vfmt |= RADEON_CP_VC_FRMT_ST0;
+      }
+      component[nr++] = &rmesa->tcl.tex[0];
+   }
+
+   if (inputs & VERT_TEX1) {
+      if (!rmesa->tcl.tex[1].buf)
+	 emit_tex_vector( ctx, 
+			  &(rmesa->tcl.tex[1]), 
+			  (char *)VB->TexCoordPtr[1]->data,
+			  VB->TexCoordPtr[1]->size,
+			  VB->TexCoordPtr[1]->stride,
+			  count );
+	 
+      switch( VB->TexCoordPtr[1]->size ) {
+      case 4: 
+	 vtx |= RADEON_TCL_VTX_Q1;
+	 vfmt |= RADEON_CP_VC_FRMT_Q1;
+      default: 
+	 vfmt |= RADEON_CP_VC_FRMT_ST1;
+      }
+      component[nr++] = &rmesa->tcl.tex[1];
+   }
+
+   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
+   }
+
+   rmesa->tcl.nr_aos_components = nr;
+   rmesa->tcl.vertex_format = vfmt;
+}
+
+
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, newinputs );
+
+   if (newinputs & VERT_OBJ) 
+     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
+
+   if (newinputs & VERT_NORM) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
+
+   if (newinputs & VERT_RGBA) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
+
+   if (newinputs & VERT_SPEC_RGB) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
+
+   if (newinputs & VERT_TEX0)
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[0], __FUNCTION__ );
+
+   if (newinputs & VERT_TEX1)
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[1], __FUNCTION__ );
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c
new file mode 100644
index 000000000..da7ca56bd
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c
@@ -0,0 +1,336 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+
+#include "mmath.h"
+#include "mtypes.h"
+#include "enums.h"
+#include "colormac.h"
+#include "light.h"
+
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_imm_debug.h"
+
+#define RADEON_TCL_MAX_SETUP 13
+
+union emit_union { float f; GLuint ui; GLubyte ub[4]; };
+
+static struct {
+   void   (*emit)( GLcontext *, GLuint, GLuint, void * );
+   GLuint vertex_size;
+   GLuint vertex_format;
+} setup_tab[RADEON_TCL_MAX_SETUP];
+
+#define DO_W    (IND & RADEON_CP_VC_FRMT_W0)
+#define DO_RGBA (IND & RADEON_CP_VC_FRMT_PKCOLOR)
+#define DO_SPEC (IND & RADEON_CP_VC_FRMT_PKSPEC)
+#define DO_FOG  (IND & RADEON_CP_VC_FRMT_PKSPEC)
+#define DO_TEX0 (IND & RADEON_CP_VC_FRMT_ST0)
+#define DO_TEX1 (IND & RADEON_CP_VC_FRMT_ST1)
+#define DO_PTEX (IND & RADEON_CP_VC_FRMT_Q0)
+#define DO_NORM (IND & RADEON_CP_VC_FRMT_N0)
+
+#define DO_TEX2 0
+#define DO_TEX3 0
+
+#define GET_TEXSOURCE(n)  n
+#define GET_UBYTE_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteColor
+#define GET_UBYTE_SPEC_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteSecondaryColor
+
+#define IMPORT_FLOAT_COLORS radeon_import_float_colors
+#define IMPORT_FLOAT_SPEC_COLORS radeon_import_float_spec_colors
+
+/***********************************************************************
+ *             Generate vertex emit functions               *
+ ***********************************************************************/
+
+
+/* Defined in order of increasing vertex size:
+ */
+#define IDX 0
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR)
+#define TAG(x) x##_rgba
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 1
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_n
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 2
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0)
+#define TAG(x) x##_rgba_st
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 3
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_n
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 4
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_n
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 5
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_st_st
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 6
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_st_n
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 7
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_spec_st_st
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 8
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_st_n
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 9
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgpa_spec_st_st_n
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 10
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 11
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq_stq
+#include "radeon_tcl_vbtmp.h"
+
+#define IDX 12
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_W0|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_w_rgpa_spec_stq_stq_n
+#include "radeon_tcl_vbtmp.h"
+
+
+
+
+
+/***********************************************************************
+ *                         Initialization 
+ ***********************************************************************/
+
+
+static void init_tcl_verts( void )
+{
+   init_rgba();
+   init_n();
+   init_rgba_n();
+   init_rgba_st();
+   init_st_n();
+   init_rgba_st_st();
+   init_rgba_st_n();
+   init_rgba_spec_st_st();
+   init_st_st_n();
+   init_rgpa_spec_st_st_n();
+   init_rgba_stq();
+   init_rgba_stq_stq();
+   init_w_rgpa_spec_stq_stq_n();
+}
+
+
+void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint req = 0;
+   GLuint vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
+		 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1));
+   int i;
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_tcl_verts();
+      firsttime = 0;
+   }
+		     
+   if (1) {
+      req |= RADEON_CP_VC_FRMT_Z;
+      if (VB->ObjPtr->size == 4) {
+	 req |= RADEON_CP_VC_FRMT_W0;
+      }
+   }
+
+   if (inputs & VERT_NORM) {
+      req |= RADEON_CP_VC_FRMT_N0;
+   }
+   
+   if (inputs & VERT_RGBA) {
+      req |= RADEON_CP_VC_FRMT_PKCOLOR;
+   }
+
+   if (inputs & VERT_SPEC_RGB) {
+      req |= RADEON_CP_VC_FRMT_PKSPEC;
+   }
+
+   if (inputs & VERT_TEX0) {
+      req |= RADEON_CP_VC_FRMT_ST0;
+
+      if (VB->TexCoordPtr[0]->size == 4) {
+	 req |= RADEON_CP_VC_FRMT_Q0;
+	 vtx |= RADEON_TCL_VTX_Q0;
+      }
+   }
+
+   if (inputs & VERT_TEX1) {
+      req |= RADEON_CP_VC_FRMT_ST1;
+
+      if (VB->TexCoordPtr[1]->size == 4) {
+	 req |= RADEON_CP_VC_FRMT_Q1;
+	 vtx |= RADEON_TCL_VTX_Q1;
+      }
+   }
+
+   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
+   }
+
+   for (i = 0 ; i < RADEON_TCL_MAX_SETUP ; i++) 
+      if ((setup_tab[i].vertex_format & req) == req) 
+	 break;
+
+   if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
+       rmesa->tcl.indexed_verts.buf)
+      return;
+
+   if (rmesa->tcl.indexed_verts.buf)
+      radeonReleaseArrays( ctx, ~0 );
+
+   radeonAllocDmaRegionVerts( rmesa, 
+			      &rmesa->tcl.indexed_verts, 
+			      VB->Count,
+			      setup_tab[i].vertex_size * 4, 
+			      4);
+
+   setup_tab[i].emit( ctx, 0, VB->Count, 
+		      rmesa->tcl.indexed_verts.address + 
+		      rmesa->tcl.indexed_verts.start );
+
+   rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
+   rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
+   rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
+   rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
+
+   rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts;
+   rmesa->tcl.nr_aos_components = 1;
+}
+
+
+
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, newinputs );
+
+   if (newinputs) 
+     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c
new file mode 100644
index 000000000..2d0ac3388
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c
@@ -0,0 +1,891 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc, Cedar Park, TX.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keithw@tungstengraphics.com>
+ *
+ */
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_sanity.h"
+
+/* Set this '1' to get more verbiage.
+ */
+#define MORE_VERBOSE 0
+
+#if MORE_VERBOSE
+#define VERBOSE (RADEON_DEBUG & DEBUG_VERBOSE)
+#define NORMAL  (1)
+#else
+#define VERBOSE 0
+#define NORMAL  (RADEON_DEBUG & DEBUG_VERBOSE)
+#endif
+
+
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.  
+ */
+static struct { 
+   int start; 
+   int len; 
+   const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+   { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+   { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+   { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+   { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+   { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+   { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+   { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+   { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+   { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+   { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+};
+
+struct reg_names {
+   int idx;
+   const char *name;
+};
+
+static struct reg_names reg_names[] = {
+   { RADEON_PP_MISC, "RADEON_PP_MISC" },
+   { RADEON_PP_FOG_COLOR, "RADEON_PP_FOG_COLOR" },
+   { RADEON_RE_SOLID_COLOR, "RADEON_RE_SOLID_COLOR" },
+   { RADEON_RB3D_BLENDCNTL, "RADEON_RB3D_BLENDCNTL" },
+   { RADEON_RB3D_DEPTHOFFSET, "RADEON_RB3D_DEPTHOFFSET" },
+   { RADEON_RB3D_DEPTHPITCH, "RADEON_RB3D_DEPTHPITCH" },
+   { RADEON_RB3D_ZSTENCILCNTL, "RADEON_RB3D_ZSTENCILCNTL" },
+   { RADEON_PP_CNTL, "RADEON_PP_CNTL" },
+   { RADEON_RB3D_CNTL, "RADEON_RB3D_CNTL" },
+   { RADEON_RB3D_COLOROFFSET, "RADEON_RB3D_COLOROFFSET" },
+   { RADEON_RB3D_COLORPITCH, "RADEON_RB3D_COLORPITCH" },
+   { RADEON_SE_CNTL, "RADEON_SE_CNTL" },
+   { RADEON_SE_COORD_FMT, "RADEON_SE_COORDFMT" },
+   { RADEON_SE_CNTL_STATUS, "RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_LINE_PATTERN, "RADEON_RE_LINE_PATTERN" },
+   { RADEON_RE_LINE_STATE, "RADEON_RE_LINE_STATE" },
+   { RADEON_SE_LINE_WIDTH, "RADEON_SE_LINE_WIDTH" },
+   { RADEON_RB3D_STENCILREFMASK, "RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_RB3D_ROPCNTL, "RADEON_RB3D_ROPCNTL" },
+   { RADEON_RB3D_PLANEMASK, "RADEON_RB3D_PLANEMASK" },
+   { RADEON_SE_VPORT_XSCALE, "RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_VPORT_XOFFSET, "RADEON_SE_VPORT_XOFFSET" },
+   { RADEON_SE_VPORT_YSCALE, "RADEON_SE_VPORT_YSCALE" },
+   { RADEON_SE_VPORT_YOFFSET, "RADEON_SE_VPORT_YOFFSET" },
+   { RADEON_SE_VPORT_ZSCALE, "RADEON_SE_VPORT_ZSCALE" },
+   { RADEON_SE_VPORT_ZOFFSET, "RADEON_SE_VPORT_ZOFFSET" },
+   { RADEON_RE_MISC, "RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0, "RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_TXFILTER_1, "RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_TXFILTER_2, "RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_TXFORMAT_0, "RADEON_PP_TXFORMAT_0" },
+   { RADEON_PP_TXFORMAT_1, "RADEON_PP_TXFORMAT_1" },
+   { RADEON_PP_TXFORMAT_2, "RADEON_PP_TXFORMAT_3" },
+   { RADEON_PP_TXOFFSET_0, "RADEON_PP_TXOFFSET_0" },
+   { RADEON_PP_TXOFFSET_1, "RADEON_PP_TXOFFSET_1" },
+   { RADEON_PP_TXOFFSET_2, "RADEON_PP_TXOFFSET_3" },
+   { RADEON_PP_TXCBLEND_0, "RADEON_PP_TXCBLEND_0" },
+   { RADEON_PP_TXCBLEND_1, "RADEON_PP_TXCBLEND_1" },
+   { RADEON_PP_TXCBLEND_2, "RADEON_PP_TXCBLEND_3" },
+   { RADEON_PP_TXABLEND_0, "RADEON_PP_TXABLEND_0" },
+   { RADEON_PP_TXABLEND_1, "RADEON_PP_TXABLEND_1" },
+   { RADEON_PP_TXABLEND_2, "RADEON_PP_TXABLEND_3" },
+   { RADEON_PP_TFACTOR_0, "RADEON_PP_TFACTOR_0" },
+   { RADEON_PP_TFACTOR_1, "RADEON_PP_TFACTOR_1" },
+   { RADEON_PP_TFACTOR_2, "RADEON_PP_TFACTOR_3" },
+   { RADEON_PP_BORDER_COLOR_0, "RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_BORDER_COLOR_1, "RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_BORDER_COLOR_2, "RADEON_PP_BORDER_COLOR_3" },
+   { RADEON_SE_ZBIAS_FACTOR, "RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_ZBIAS_CONSTANT, "RADEON_SE_ZBIAS_CONSTANT" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT, "RADEON_SE_TCL_OUTPUT_VTXFMT" },
+   { RADEON_SE_TCL_OUTPUT_VTX_SEL, "RADEON_SE_TCL_OUTPUT_VTXSEL" },
+   { RADEON_SE_TCL_MATRIX_SELECT_0, "RADEON_SE_TCL_MATRIX_SELECT_0" },
+   { RADEON_SE_TCL_MATRIX_SELECT_1, "RADEON_SE_TCL_MATRIX_SELECT_1" },
+   { RADEON_SE_TCL_UCP_VERT_BLEND_CTL, "RADEON_SE_TCL_UCP_VERT_BLEND_CTL" },
+   { RADEON_SE_TCL_TEXTURE_PROC_CTL, "RADEON_SE_TCL_TEXTURE_PROC_CTL" },
+   { RADEON_SE_TCL_LIGHT_MODEL_CTL, "RADEON_SE_TCL_LIGHT_MODEL_CTL" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_0, "RADEON_SE_TCL_PER_LIGHT_CTL_0" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_1, "RADEON_SE_TCL_PER_LIGHT_CTL_1" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_2, "RADEON_SE_TCL_PER_LIGHT_CTL_2" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_3, "RADEON_SE_TCL_PER_LIGHT_CTL_3" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, "RADEON_SE_TCL_EMMISSIVE_RED" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_GREEN, "RADEON_SE_TCL_EMMISSIVE_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_BLUE, "RADEON_SE_TCL_EMMISSIVE_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_ALPHA, "RADEON_SE_TCL_EMMISSIVE_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_RED, "RADEON_SE_TCL_AMBIENT_RED" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN, "RADEON_SE_TCL_AMBIENT_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE, "RADEON_SE_TCL_AMBIENT_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_ALPHA, "RADEON_SE_TCL_AMBIENT_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_RED, "RADEON_SE_TCL_DIFFUSE_RED" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_GREEN, "RADEON_SE_TCL_DIFFUSE_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_BLUE, "RADEON_SE_TCL_DIFFUSE_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_ALPHA, "RADEON_SE_TCL_DIFFUSE_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_RED, "RADEON_SE_TCL_SPECULAR_RED" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN, "RADEON_SE_TCL_SPECULAR_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE, "RADEON_SE_TCL_SPECULAR_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA, "RADEON_SE_TCL_SPECULAR_ALPHA" },
+   { RADEON_SE_TCL_SHININESS, "RADEON_SE_TCL_SHININESS" },
+   { RADEON_SE_COORD_FMT, "RADEON_SE_COORD_FMT" }
+};
+
+static struct reg_names scalar_names[] = {
+   { RADEON_SS_LIGHT_DCD_ADDR, "LIGHT_DCD" },
+   { RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR, "LIGHT_SPOT_EXPONENT" },
+   { RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR, "LIGHT_SPOT_CUTOFF" },
+   { RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR, "LIGHT_SPECULAR_THRESH" },
+   { RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR, "LIGHT_RANGE_CUTOFF" },
+   { RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, "VERT_GUARD_CLIP" },
+   { RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR, "VERT_GUARD_DISCARD" },
+   { RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR, "HORZ_GUARD_CLIP" },
+   { RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR, "HORZ_GUARD_DISCARD" },
+   { RADEON_SS_SHININESS, "SHININESS" },
+   { 1000, "" },
+};
+
+/* Puff these out to make them look like normal (dword) registers.
+ */
+static struct reg_names vector_names[] = {
+   { RADEON_VS_MATRIX_0_ADDR * 4, "MATRIX_0" },
+   { RADEON_VS_MATRIX_1_ADDR * 4, "MATRIX_1" },
+   { RADEON_VS_MATRIX_2_ADDR * 4, "MATRIX_2" },
+   { RADEON_VS_MATRIX_3_ADDR * 4, "MATRIX_3" },
+   { RADEON_VS_MATRIX_4_ADDR * 4, "MATRIX_4" },
+   { RADEON_VS_MATRIX_5_ADDR * 4, "MATRIX_5" },
+   { RADEON_VS_MATRIX_6_ADDR * 4, "MATRIX_6" },
+   { RADEON_VS_MATRIX_7_ADDR * 4, "MATRIX_7" },
+   { RADEON_VS_MATRIX_8_ADDR * 4, "MATRIX_8" },
+   { RADEON_VS_MATRIX_9_ADDR * 4, "MATRIX_9" },
+   { RADEON_VS_MATRIX_10_ADDR * 4, "MATRIX_10" },
+   { RADEON_VS_MATRIX_11_ADDR * 4, "MATRIX_11" },
+   { RADEON_VS_MATRIX_12_ADDR * 4, "MATRIX_12" },
+   { RADEON_VS_MATRIX_13_ADDR * 4, "MATRIX_13" },
+   { RADEON_VS_MATRIX_14_ADDR * 4, "MATRIX_14" },
+   { RADEON_VS_MATRIX_15_ADDR * 4, "MATRIX_15" },
+   { RADEON_VS_LIGHT_AMBIENT_ADDR * 4, "LIGHT_AMBIENT" },
+   { RADEON_VS_LIGHT_DIFFUSE_ADDR * 4, "LIGHT_DIFFUSE" },
+   { RADEON_VS_LIGHT_SPECULAR_ADDR * 4, "LIGHT_SPECULAR" },
+   { RADEON_VS_LIGHT_DIRPOS_ADDR * 4, "LIGHT_DIRPOS" },
+   { RADEON_VS_LIGHT_HWVSPOT_ADDR * 4, "LIGHT_HWVSPOT" },
+   { RADEON_VS_LIGHT_ATTENUATION_ADDR * 4, "LIGHT_ATTENUATION" },
+   { RADEON_VS_MATRIX_EYE2CLIP_ADDR * 4, "MATRIX_EYE2CLIP" },
+   { RADEON_VS_UCP_ADDR * 4, "UCP" },
+   { RADEON_VS_GLOBAL_AMBIENT_ADDR * 4, "GLOBAL_AMBIENT" },
+   { RADEON_VS_FOG_PARAM_ADDR * 4, "FOG_PARAM" },
+   { RADEON_VS_EYE_VECTOR_ADDR * 4, "EYE_VECTOR" },
+   { 1000, "" },
+};
+
+union fi { float f; int i; };
+
+struct reg {
+   int idx; 
+   struct reg_names *closest;
+   int is_float;
+   union fi current;
+   union fi *values;
+   int nvalues;
+   int nalloc;
+   float vmin, vmax;
+};
+
+
+static struct reg regs[Elements(reg_names)+1];
+static struct reg scalars[64+1];
+static struct reg vectors[128*4+1];
+
+#define V_VTXFMT 0
+#define V_MAX 1
+
+static struct reg others[] = {
+   { V_VTXFMT, 0, 0 },
+   { -1 }
+};
+
+static int total, total_changed, bufs;
+
+static void init_regs( void )
+{
+   struct reg_names *tmp;
+   int i;
+
+   for (i = 0 ; i < Elements(regs) ; i++) {
+      regs[i].closest = &reg_names[i];
+      regs[i].idx = reg_names[i].idx;
+   }
+
+   for (i = 0, tmp = scalar_names ; i < 64 ; i++) {
+      if (tmp[1].idx == i) tmp++;
+      scalars[i].idx = i;
+      scalars[i].closest = tmp;
+      scalars[i].is_float = 1;
+   }
+
+   for (i = 0, tmp = vector_names ; i < 128*4 ; i++) {
+      if (tmp[1].idx == i) tmp++;
+      vectors[i].closest = tmp;
+      vectors[i].is_float = 1;
+      vectors[i].idx = i;
+   }
+
+   regs[Elements(regs)-1].idx = -1;
+   scalars[Elements(scalars)-1].idx = -1;
+   vectors[Elements(vectors)-1].idx = -1;
+}
+
+static int find_or_add_value( struct reg *reg, int val )
+{
+   int j;
+
+   for ( j = 0 ; j < reg->nvalues ; j++)
+      if ( val == reg->values[j].i )
+	 return 1;
+
+   if (j == reg->nalloc) {
+      reg->nalloc += 5;
+      reg->nalloc *= 2;
+      reg->values = (union fi *) realloc( reg->values, 
+					  reg->nalloc * sizeof(union fi) );
+   }
+
+   reg->values[reg->nvalues++].i = val;
+   return 0;
+}
+
+static struct reg *lookup_reg( struct reg *tab, int reg )
+{
+   int i;
+
+   for (i = 0 ; tab[i].idx != -1 ; i++) {
+      if (tab[i].idx == reg)
+	 return &tab[i];
+   }
+
+   fprintf(stderr, "*** unknown reg %d\n", reg);
+   return 0;
+}
+
+
+static const char *get_reg_name( struct reg *reg )
+{
+   static char tmp[80];
+
+   if (reg->idx == reg->closest->idx) 
+      return reg->closest->name;
+
+   sprintf(tmp, "%s+%d", 
+	   reg->closest->name, 
+	   reg->idx - reg->closest->idx);
+	 
+   return tmp;
+}
+
+static int print_int_reg_assignment( struct reg *reg, int data )
+{
+   int changed = (reg->current.i != data);
+   int ever_seen = find_or_add_value( reg, data );
+   
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+       fprintf(stderr, "   %s <-- 0x%x", get_reg_name(reg), data);
+       
+   if (NORMAL) {
+      if (!ever_seen) 
+	 fprintf(stderr, " *** BRAND NEW VALUE");
+      else if (changed) 
+	 fprintf(stderr, " *** CHANGED"); 
+   }
+   
+   reg->current.i = data;
+
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+
+static int print_float_reg_assignment( struct reg *reg, float data )
+{
+   int changed = (reg->current.f != data);
+   int newmin = (data < reg->vmin);
+   int newmax = (data > reg->vmax);
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "   %s <-- %.3f", get_reg_name(reg), data);
+
+   if (NORMAL) {
+      if (newmin) {
+	 fprintf(stderr, " *** NEW MIN (prev %.3f)", reg->vmin);
+	 reg->vmin = data;
+      }
+      else if (newmax) {
+	 fprintf(stderr, " *** NEW MAX (prev %.3f)", reg->vmax);
+	 reg->vmax = data;
+      }
+      else if (changed) {
+	 fprintf(stderr, " *** CHANGED");
+      }
+   }
+
+   reg->current.f = data;
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+static int print_reg_assignment( struct reg *reg, int data )
+{
+   if (reg->is_float)
+      return print_float_reg_assignment( reg, *(float *)&data );
+   else
+      return print_int_reg_assignment( reg, data );
+}
+
+static int radeon_emit_packets( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int id = (int)header.packet.packet_id;
+   int sz = packet[id].len;
+   int *data = (int *)cmdbuf->buf;
+   int i;
+   
+   if (sz * sizeof(int) > cmdbuf->bufsz) {
+      fprintf(stderr, "Packet overflows cmdbuf\n");      
+      return -EINVAL;
+   }
+
+   if (VERBOSE) 
+      fprintf(stderr, "Packet 0 reg %s nr %d\n", packet[id].name, sz );
+
+   for ( i = 0 ; i < sz ; i++) {
+      struct reg *reg = lookup_reg( regs, packet[id].start + i*4 );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int radeon_emit_scalars( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset;
+   int stride = header.scalars.stride;
+   int i;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars, start %d stride %d nr %d (end %d)\n",
+	      start, stride, sz, start + stride * sz);
+
+   if (start + stride * sz > 64) {
+      fprintf(stderr, "emit scalars OVERFLOW %d/%d/%d\n", start, stride, sz);
+      return -1;
+   }
+
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+	 
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+/* Check: inf/nan/extreme-size?
+ * Check: table start, end, nr, etc.
+ */
+static int radeon_emit_vectors( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int sz = header.vectors.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.vectors.offset;
+   int stride = header.vectors.stride;
+   int i,j;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit vectors, start %d stride %d nr %d (end %d)\n",
+	      start, stride, sz, start + stride * sz);
+
+   if (start + stride * (sz/4) > 128) {
+      fprintf(stderr, "emit vectors OVERFLOW %d/%d/%d\n", start, stride, sz);
+      return -1;
+   }
+
+   for (i = 0 ; i < sz ;  start += stride) {
+      int changed = 0;
+      for (j = 0 ; j < 4 ; i++,j++) {
+	 struct reg *reg = lookup_reg( vectors, start*4+j );
+	 if (print_reg_assignment( reg, data[i] ))
+	    changed = 1;
+      }
+      if (changed)
+	 total_changed += 4;
+      total += 4;
+   }
+	 
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int print_vertex_format( int vfmt )
+{
+   if (NORMAL) {
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+	      "vertex format",
+	      vfmt,
+	      "xy,",
+	      (vfmt & RADEON_CP_VC_FRMT_Z) ? "z," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_W0) ? "w0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPCOLOR) ? "fpcolor," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPALPHA) ? "fpalpha," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_PKCOLOR) ? "pkcolor," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPSPEC) ? "fpspec," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPFOG) ? "fpfog," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_PKSPEC) ? "pkspec," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST0) ? "st0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST1) ? "st1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q1) ? "q1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST2) ? "st2," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q2) ? "q2," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST3) ? "st3," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q3) ? "q3," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q0) ? "q0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_N0) ? "n0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_XY1) ? "xy1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Z1) ? "z1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_W1) ? "w1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_N1) ? "n1," : "");
+
+   
+      if (!find_or_add_value( &others[V_VTXFMT], vfmt ))
+	 fprintf(stderr, " *** NEW VALUE");
+
+      fprintf(stderr, "\n");
+   }
+
+   return 0;
+}
+
+static char *primname[0xf] = {
+   "NONE",
+   "POINT",
+   "LINE",
+   "LINE_STRIP",
+   "TRI_LIST",
+   "TRI_FAN",
+   "TRI_STRIP",
+   "TRI_TYPE_2",
+   "RECT_LIST",
+   "3VRT_POINT_LIST",
+   "3VRT_LINE_LIST",
+};
+
+static int print_prim_and_flags( int prim )
+{
+   int numverts;
+   
+   if (NORMAL)
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s\n",
+	      "prim flags",
+	      prim,
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_IND) ? "IND," : "",
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_LIST) ? "LIST," : "",
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_RING) ? "RING," : "",
+	      (prim & RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA) ? "RGBA," : "BGRA, ",
+	      (prim & RADEON_CP_VC_CNTL_MAOS_ENABLE) ? "MAOS," : "",
+	      (prim & RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE) ? "RADEON," : "",
+	      (prim & RADEON_CP_VC_CNTL_TCL_ENABLE) ? "TCL," : "");
+
+   if ((prim & 0xf) > RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST) {
+      fprintf(stderr, "   *** Bad primitive: %x\n", prim & 0xf);
+      return -1;
+   }
+
+   numverts = prim>>16;
+   
+   if (NORMAL)
+      fprintf(stderr, "   prim: %s numverts %d\n", primname[prim&0xf], numverts);
+
+   switch (prim & 0xf) {
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_NONE:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_POINT:
+      if (numverts < 1) {
+	 fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE:
+      if ((numverts & 1) || numverts == 0) {
+	 fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP:
+      if (numverts < 2) {
+	 fprintf(stderr, "Bad nr verts for line_strip %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_POINT_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST:
+      if (numverts % 3 || numverts == 0) {
+	 fprintf(stderr, "Bad nr verts for tri %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP:
+      if (numverts < 3) {
+	 fprintf(stderr, "Bad nr verts for strip/fan %d\n", numverts);
+	 return -1;
+      }
+      break;
+   default:
+      fprintf(stderr, "Bad primitive\n");
+      return -1;
+   }	
+   return 0;
+}
+
+/* build in knowledge about each packet type
+ */
+static int radeon_emit_packet3( drmRadeonCmdBuffer *cmdbuf )
+{
+   int cmdsz;
+   int *cmd = (int *)cmdbuf->buf;
+   int *tmp;
+   int i, stride, size, start;
+
+   cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+
+   if ((cmd[0] & RADEON_CP_PACKET_MASK) != RADEON_CP_PACKET3 ||
+       cmdsz * 4 > cmdbuf->bufsz ||
+       cmdsz > RADEON_CP_PACKET_MAX_DWORDS) {
+      fprintf(stderr, "Bad packet\n");
+      return -EINVAL;
+   }
+
+   switch( cmd[0] & ~RADEON_CP_PACKET_COUNT_MASK ) {
+   case RADEON_CP_PACKET3_NOP:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_NOP, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_NEXT_CHAR:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_NEXT_CHAR, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_PLY_NEXTSCAN:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_PLY_NEXTSCAN, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_SET_SCISSORS:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_SET_SCISSORS, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_RNDR_GEN_INDX_PRIM, %d dwords\n",
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_LOAD_MICROCODE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_LOAD_MICROCODE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_WAIT_FOR_IDLE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_WAIT_FOR_IDLE, %d dwords\n", cmdsz);
+      break;
+
+   case RADEON_CP_PACKET3_3D_DRAW_VBUF:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_VBUF, %d dwords\n", cmdsz);
+      print_vertex_format(cmd[1]);
+      print_prim_and_flags(cmd[2]);
+      break;
+
+   case RADEON_CP_PACKET3_3D_DRAW_IMMD:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_IMMD, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_DRAW_INDX: {
+      int neltdwords;
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_INDX, %d dwords\n", cmdsz);
+      print_vertex_format(cmd[1]);
+      print_prim_and_flags(cmd[2]);
+      neltdwords = cmd[2]>>16;
+      neltdwords += neltdwords & 1;
+      neltdwords /= 2;
+      if (neltdwords + 3 != cmdsz)
+	 fprintf(stderr, "Mismatch in DRAW_INDX, %d vs cmdsz %d\n",
+		 neltdwords, cmdsz);
+      break;
+   }
+   case RADEON_CP_PACKET3_LOAD_PALETTE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_LOAD_PALETTE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_LOAD_VBPNTR:
+      if (NORMAL) {
+	 fprintf(stderr, "PACKET3_3D_LOAD_VBPNTR, %d dwords\n", cmdsz);
+	 fprintf(stderr, "   nr arrays: %d\n", cmd[1]);
+      }
+
+      if (cmd[1]/2 + cmd[1]%2 != cmdsz - 3) {
+	 fprintf(stderr, "  ****** MISMATCH %d/%d *******\n",
+		 cmd[1]/2 + cmd[1]%2 + 3, cmdsz);
+	 return -EINVAL;
+      }
+
+      if (NORMAL) {
+	 tmp = cmd+2;
+	 for (i = 0 ; i < cmd[1] ; i++) {
+	    if (i & 1) {
+	       stride = (tmp[0]>>24) & 0xff;
+	       size = (tmp[0]>>16) & 0xff;
+	       start = tmp[2];
+	       tmp += 3;
+	    }
+	    else {
+	       stride = (tmp[0]>>8) & 0xff;
+	       size = (tmp[0]) & 0xff;
+	       start = tmp[1];
+	    }
+	    fprintf(stderr, "   array %d: start 0x%x vsize %d vstride %d\n",
+		    i, start, size, stride );
+	 }
+      }
+      break;
+   case RADEON_CP_PACKET3_CNTL_PAINT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_PAINT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_BITBLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_BITBLT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_SMALLTEXT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_SMALLTEXT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_HOSTDATA_BLT, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_POLYLINE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_POLYLINE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_POLYSCANLINES:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_POLYSCANLINES, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_PAINT_MULTI:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_PAINT_MULTI, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_BITBLT_MULTI:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_BITBLT_MULTI, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_TRANS_BITBLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_TRANS_BITBLT, %d dwords\n", 
+	      cmdsz);
+      break;
+   default:
+      fprintf(stderr, "UNKNOWN PACKET, %d dwords\n", cmdsz);
+      break;
+   }
+      
+   cmdbuf->buf += cmdsz * 4;
+   cmdbuf->bufsz -= cmdsz * 4;
+   return 0;
+}
+
+
+/* Check cliprects for bounds, then pass on to above:
+ */
+static int radeon_emit_packet3_cliprect( drmRadeonCmdBuffer *cmdbuf )
+{   
+   XF86DRIClipRectRec *boxes = (XF86DRIClipRectRec *)cmdbuf->boxes;
+   int i = 0;
+
+   if (NORMAL) {
+      do {
+	 if ( i < cmdbuf->nbox ) {
+	    fprintf(stderr, "Emit box %d/%d %d,%d %d,%d\n",
+		    i, cmdbuf->nbox,
+		    boxes[i].x1, boxes[i].y1, boxes[i].x2, boxes[i].y2);
+	 }
+      } while ( ++i < cmdbuf->nbox );
+   }
+
+   if (cmdbuf->nbox == 1)
+      cmdbuf->nbox = 0;
+
+   return radeon_emit_packet3( cmdbuf );
+}
+
+
+int radeonSanityCmdBuffer( radeonContextPtr rmesa,
+			   int nbox,
+			   XF86DRIClipRectRec *boxes )
+{
+   int idx;
+   drmRadeonCmdBuffer cmdbuf;
+   drmRadeonCmdHeader header;
+   static int inited = 0;
+
+   if (!inited) {
+      init_regs();
+      inited = 1;
+   }
+
+   cmdbuf.buf = rmesa->store.cmd_buf;
+   cmdbuf.bufsz = rmesa->store.cmd_used;
+   cmdbuf.boxes = (drmClipRect *)boxes;
+   cmdbuf.nbox = nbox;
+
+   while ( cmdbuf.bufsz >= sizeof(header) ) {
+		
+      header.i = *(int *)cmdbuf.buf;
+      cmdbuf.buf += sizeof(header);
+      cmdbuf.bufsz -= sizeof(header);
+
+      switch (header.header.cmd_type) {
+      case RADEON_CMD_PACKET: 
+	 if (radeon_emit_packets( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packets failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_SCALARS:
+	 if (radeon_emit_scalars( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_scalars failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_VECTORS:
+	 if (radeon_emit_vectors( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_vectors failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_DMA_DISCARD:
+	 idx = header.dma.buf_idx;
+	 if (NORMAL)
+	    fprintf(stderr, "RADEON_CMD_DMA_DISCARD buf %d\n", idx);
+	 bufs++;
+	 break;
+
+      case RADEON_CMD_PACKET3:
+	 if (radeon_emit_packet3( &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packet3 failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_PACKET3_CLIP:
+	 if (radeon_emit_packet3_cliprect( &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packet3_clip failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      default:
+	 fprintf(stderr,"bad cmd_type %d at %p\n", 
+		   header.header.cmd_type,
+		   cmdbuf.buf - sizeof(header));
+	 return -EINVAL;
+      }
+   }
+
+   if (NORMAL)
+   {
+      static int n = 0;
+      n++;
+      if (n == 10) {
+	 fprintf(stderr, "Bufs %d Total emitted %d real changes %d (%.2f%%)\n",
+		 bufs,
+		 total, total_changed, 
+		 ((float)total_changed/(float)total*100.0));
+	 fprintf(stderr, "Total emitted per buf: %.2f\n",
+		 (float)total/(float)bufs);
+	 fprintf(stderr, "Real changes per buf: %.2f\n",
+		 (float)total_changed/(float)bufs);
+
+	 bufs = n = total = total_changed = 0;
+      }
+   }
+
+   return 0;
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.h
new file mode 100644
index 000000000..58e8335dd
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.h
@@ -0,0 +1,8 @@
+#ifndef RADEON_SANITY_H
+#define RADEON_SANITY_H
+
+extern int radeonSanityCmdBuffer( radeonContextPtr rmesa,
+				  int nbox,
+				  XF86DRIClipRectRec *boxes );
+
+#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c
index 0ea25d772..414414f13 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c
@@ -47,6 +47,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define PCI_CHIP_RADEON_QE	0x5145
 #define PCI_CHIP_RADEON_QF	0x5146
 #define PCI_CHIP_RADEON_QG	0x5147
+
+#define PCI_CHIP_RADEON_QY	0x5159
+#define PCI_CHIP_RADEON_QZ	0x515A
+
+#define PCI_CHIP_RADEON_LW	0x4C57 /* mobility 7 - has tcl */
+
+#define PCI_CHIP_RADEON_LY	0x4C59
+#define PCI_CHIP_RADEON_LZ	0x4C5A
+
+#define PCI_CHIP_RV200_QW	0x5157 /* a confusing name for a radeon */
 #endif
 
 
@@ -62,7 +72,9 @@ radeonScreenPtr radeonCreateScreen( __DRIscreenPrivate *sPriv )
       int major, minor, patch;
       if ( XF86DRIQueryVersion( sPriv->display, &major, &minor, &patch ) ) {
          if ( major != 4 || minor < 0 ) {
-            __driUtilMessage( "Radeon DRI driver expected DRI version 4.0.x but got version %d.%d.%d", major, minor, patch );
+            __driUtilMessage( "Radeon DRI driver expected DRI version 4.0.x "
+			      "but got version %d.%d.%d",
+			      major, minor, patch );
             return NULL;
          }
       }
@@ -71,24 +83,39 @@ radeonScreenPtr radeonCreateScreen( __DRIscreenPrivate *sPriv )
    /* Check that the DDX driver version is compatible */
    if ( sPriv->ddxMajor != 4 ||
 	sPriv->ddxMinor < 0 ) {
-      __driUtilMessage( "Radeon DRI driver expected DDX driver version 4.0.x but got version %d.%d.%d", sPriv->ddxMajor, sPriv->ddxMinor, sPriv->ddxPatch );
+      __driUtilMessage( "Radeon DRI driver expected DDX driver version 4.0.x "
+			"but got version %d.%d.%d", 
+			sPriv->ddxMajor, sPriv->ddxMinor, sPriv->ddxPatch );
       return NULL;
    }
 
    /* Check that the DRM driver version is compatible */
-   if ( sPriv->drmMajor != 1 ||
-	sPriv->drmMinor < 2 ) {
-      __driUtilMessage( "Radeon DRI driver expected DRM driver version 1.2.x but got version %d.%d.%d", sPriv->drmMajor, sPriv->drmMinor, sPriv->drmPatch );
+   if ( sPriv->drmMajor != 1 ) {
+      __driUtilMessage( "Radeon DRI driver expected DRM driver version 1.x.x "
+			"but got version %d.%d.%d", 
+			sPriv->drmMajor, sPriv->drmMinor, sPriv->drmPatch );
       return NULL;
    }
 
+
    /* Allocate the private area */
    radeonScreen = (radeonScreenPtr) CALLOC( sizeof(*radeonScreen) );
    if ( !radeonScreen ) {
-      __driUtilMessage("radeonCreateScreen(): CALLOC radeonScreen struct failed");
+      __driUtilMessage("%s: CALLOC radeonScreen struct failed",
+		       __FUNCTION__);
       return NULL;
    }
 
+   if ( sPriv->drmMinor < RADEON_DRM_CURRENT ||
+        getenv("RADEON_COMPAT")) {
+	   fprintf( stderr, "Radeon DRI driver:\n\t"
+		    "Compatibility mode for DRM driver version %d.%d.%d\n\t"
+		    "TCL will be disabled, expect reduced performance\n\t"
+		    "(prefer DRM radeon.o 1.3.x or newer)\n\t", 
+		    sPriv->drmMajor, sPriv->drmMinor, sPriv->drmPatch ); 
+   }
+
+
    /* This is first since which regions we map depends on whether or
     * not we are using a PCI card.
     */
@@ -150,10 +177,19 @@ radeonScreenPtr radeonCreateScreen( __DRIscreenPrivate *sPriv )
    case PCI_CHIP_RADEON_QE:
    case PCI_CHIP_RADEON_QF:
    case PCI_CHIP_RADEON_QG:
-      radeonScreen->chipset = RADEON_CARD_TYPE_RADEON;
+   case PCI_CHIP_RADEON_QY:
+   case PCI_CHIP_RADEON_QZ:
+   case PCI_CHIP_RV200_QW:
+   case PCI_CHIP_RADEON_LW:
+      radeonScreen->chipset = RADEON_CHIPSET_RADEON;
+      break;
+   case PCI_CHIP_RADEON_LY:
+   case PCI_CHIP_RADEON_LZ:
+      radeonScreen->chipset = RADEON_CHIPSET_MOBILITY;
       break;
    default:
-      radeonScreen->chipset = RADEON_CARD_TYPE_RADEON;
+      fprintf(stderr, "unknown chip id, assuming full radeon support\n");
+      radeonScreen->chipset = RADEON_CHIPSET_RADEON;
       break;
    }
 
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h
index 96ab00879..e20cc2d3f 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h
@@ -51,6 +51,11 @@ typedef struct {
    drmAddress map;			/* Mapping of the DRM region */
 } radeonRegionRec, *radeonRegionPtr;
 
+#define RADEON_CHIPSET_RADEON   1
+#define RADEON_CHIPSET_MOBILITY 2
+
+#define RADEON_DRM_CURRENT 3	       /* Current drm minor version */
+
 typedef struct {
 
    int chipset;
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_span.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_span.c
index 7f7de7ee3..c2ea5858e 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_span.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_span.c
@@ -31,7 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  * Authors:
  *   Kevin E. Martin <martin@valinux.com>
  *   Gareth Hughes <gareth@valinux.com>
- *   Keith Whitwell <keithw@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
  *
  */
 
@@ -292,12 +292,22 @@ static void radeonSetReadBuffer( GLcontext *ctx,
 
    switch ( mode ) {
    case GL_FRONT_LEFT:
-      rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
-      rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
+      if ( rmesa->doPageFlip && rmesa->sarea->pfCurrentPage == 1 ) {
+        rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
+      } else {
+      	rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
+      }
       break;
    case GL_BACK_LEFT:
-      rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
-      rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
+      if ( rmesa->doPageFlip && rmesa->sarea->pfCurrentPage == 1 ) {
+      	rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
+      } else {
+        rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
+      }
       break;
    default:
       assert(0);
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c
index e790188f5..023815760 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c
@@ -25,19 +25,23 @@
  *
  * Authors:
  *    Gareth Hughes <gareth@valinux.com>
- *    Keith Whitwell <keithw@valinux.com>
+ *    Keith Whitwell <keith@tungstengraphics.com>
  */
 
 #include "radeon_context.h"
-#include "radeon_state.h"
 #include "radeon_ioctl.h"
-#include "radeon_tris.h"
-#include "radeon_vb.h"
+#include "radeon_state.h"
+#include "radeon_tcl.h"
 #include "radeon_tex.h"
+#include "radeon_swtcl.h"
+#include "radeon_vtxfmt.h"
 
+#include "mem.h"
 #include "mmath.h"
 #include "enums.h"
 #include "colormac.h"
+#include "light.h"
+#include "api_arrayelt.h"
 
 #include "swrast/swrast.h"
 #include "array_cache/acache.h"
@@ -46,6 +50,14 @@
 #include "swrast_setup/swrast_setup.h"
 
 
+#define MODEL_PROJ 0
+#define MODEL      1
+#define MODEL_IT   2
+#define TEXMAT_0   3
+#define TEXMAT_1   4
+#define TEXMAT_2   5
+
+
 /* =============================================================
  * Alpha blending
  */
@@ -53,46 +65,47 @@
 static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLchan ref )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+   RADEON_STATECHANGE( rmesa, ctx );
 
-   rmesa->state.hw.context.pp_misc &= ~(RADEON_ALPHA_TEST_OP_MASK |
-					RADEON_REF_ALPHA_MASK);
+   pp_misc &= ~(RADEON_ALPHA_TEST_OP_MASK | RADEON_REF_ALPHA_MASK);
+   pp_misc |= (ref & RADEON_REF_ALPHA_MASK);
 
    switch ( func ) {
    case GL_NEVER:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_FAIL;
+      pp_misc |= RADEON_ALPHA_TEST_FAIL;
       break;
    case GL_LESS:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_LESS;
+      pp_misc |= RADEON_ALPHA_TEST_LESS;
       break;
    case GL_EQUAL:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_EQUAL;
+      pp_misc |= RADEON_ALPHA_TEST_EQUAL;
       break;
    case GL_LEQUAL:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_LEQUAL;
+      pp_misc |= RADEON_ALPHA_TEST_LEQUAL;
       break;
    case GL_GREATER:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_GREATER;
+      pp_misc |= RADEON_ALPHA_TEST_GREATER;
       break;
    case GL_NOTEQUAL:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_NEQUAL;
+      pp_misc |= RADEON_ALPHA_TEST_NEQUAL;
       break;
    case GL_GEQUAL:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_GEQUAL;
+      pp_misc |= RADEON_ALPHA_TEST_GEQUAL;
       break;
    case GL_ALWAYS:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_PASS;
+      pp_misc |= RADEON_ALPHA_TEST_PASS;
       break;
    }
 
-   rmesa->state.hw.context.pp_misc |= (ref & RADEON_REF_ALPHA_MASK);
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc;
 }
 
 static void radeonBlendEquation( GLcontext *ctx, GLenum mode )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint b = rmesa->state.hw.context.rb3d_blendcntl & ~RADEON_COMB_FCN_MASK;
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
    GLboolean fallback = GL_FALSE;
 
    switch ( mode ) {
@@ -112,12 +125,12 @@ static void radeonBlendEquation( GLcontext *ctx, GLenum mode )
 
    FALLBACK( rmesa, RADEON_FALLBACK_BLEND_EQ, fallback );
    if ( !fallback ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-      rmesa->state.hw.context.rb3d_blendcntl = b;
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
       if ( ctx->Color.ColorLogicOpEnabled ) {
-	 rmesa->state.hw.context.rb3d_cntl |=  RADEON_ROP_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
       } else {
-	 rmesa->state.hw.context.rb3d_cntl &= ~RADEON_ROP_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
       }
    }
 }
@@ -125,8 +138,8 @@ static void radeonBlendEquation( GLcontext *ctx, GLenum mode )
 static void radeonBlendFunc( GLcontext *ctx, GLenum sfactor, GLenum dfactor )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint b = rmesa->state.hw.context.rb3d_blendcntl & ~(RADEON_SRC_BLEND_MASK |
-							 RADEON_DST_BLEND_MASK);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
+      ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
    GLboolean fallback = GL_FALSE;
 
    switch ( ctx->Color.BlendSrcRGB ) {
@@ -200,8 +213,8 @@ static void radeonBlendFunc( GLcontext *ctx, GLenum sfactor, GLenum dfactor )
 
    FALLBACK( rmesa, RADEON_FALLBACK_BLEND_FUNC, fallback );
    if ( !fallback ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-      rmesa->state.hw.context.rb3d_blendcntl = b;
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
    }
 }
 
@@ -221,33 +234,33 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-   rmesa->state.hw.context.rb3d_zstencilcntl &= ~RADEON_Z_TEST_MASK;
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
 
    switch ( ctx->Depth.Func ) {
    case GL_NEVER:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_NEVER;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEVER;
       break;
    case GL_LESS:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_LESS;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LESS;
       break;
    case GL_EQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_EQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_EQUAL;
       break;
    case GL_LEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_LEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LEQUAL;
       break;
    case GL_GREATER:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_GREATER;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GREATER;
       break;
    case GL_NOTEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_NEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEQUAL;
       break;
    case GL_GEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_GEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GEQUAL;
       break;
    case GL_ALWAYS:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_ALWAYS;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_ALWAYS;
       break;
    }
 }
@@ -256,19 +269,19 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
 static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+   RADEON_STATECHANGE( rmesa, ctx );
 
    if ( ctx->Depth.Mask ) {
-      rmesa->state.hw.context.rb3d_zstencilcntl |=  RADEON_Z_WRITE_ENABLE;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |=  RADEON_Z_WRITE_ENABLE;
    } else {
-      rmesa->state.hw.context.rb3d_zstencilcntl &= ~RADEON_Z_WRITE_ENABLE;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_WRITE_ENABLE;
    }
 }
 
 static void radeonClearDepth( GLcontext *ctx, GLclampd d )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint format = (rmesa->state.hw.context.rb3d_zstencilcntl &
+   GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
 		    RADEON_DEPTH_FORMAT_MASK);
 
    switch ( format ) {
@@ -286,22 +299,157 @@ static void radeonClearDepth( GLcontext *ctx, GLclampd d )
  * Fog
  */
 
+
 static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLchan c[4];
+   union { int i; float f; } c, d;
+   GLchan col[4];
+
+   c.i = rmesa->hw.fog.cmd[FOG_C];
+   d.i = rmesa->hw.fog.cmd[FOG_D];
+
+   switch (pname) {
+   case GL_FOG_MODE:
+      if (!ctx->Fog.Enabled)
+	 return;
+      RADEON_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
+      switch (ctx->Fog.Mode) {
+      case GL_LINEAR:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_LINEAR;
+	 if (ctx->Fog.Start == ctx->Fog.End) {
+	    c.f = 1.0F;
+	    d.f = 1.0F;
+	 }
+	 else {
+	    c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+	    d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start);
+	 }
+	 break;
+      case GL_EXP:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP;
+	 c.f = 0.0;
+	 d.f = ctx->Fog.Density;
+	 break;
+      case GL_EXP2:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP2;
+	 c.f = 0.0;
+	 d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+	 break;
+      default:
+	 return;
+      }
+      break;
+   case GL_FOG_DENSITY:
+      switch (ctx->Fog.Mode) {
+      case GL_EXP:
+	 c.f = 0.0;
+	 d.f = ctx->Fog.Density;
+	 break;
+      case GL_EXP2:
+	 c.f = 0.0;
+	 d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+	 break;
+      default:
+	 break;
+      }
+      break;
+   case GL_FOG_START:
+   case GL_FOG_END:
+      if (ctx->Fog.Mode == GL_LINEAR) {
+	 if (ctx->Fog.Start == ctx->Fog.End) {
+	    c.f = 1.0F;
+	    d.f = 1.0F;
+	 } else {
+	    c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+	    d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start);
+	 }
+      }
+      break;
+   case GL_FOG_COLOR: 
+      RADEON_STATECHANGE( rmesa, ctx );
+      UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] =
+	 radeonPackColor( 4, col[0], col[1], col[2], 0 );
+      break;
+   case GL_FOG_COORDINATE_SOURCE_EXT: 
+      /* What to do?
+       */
+      break;
+   default:
+      return;
+   }
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-   UNCLAMPED_FLOAT_TO_RGB_CHAN( c, ctx->Fog.Color );
-   rmesa->state.hw.context.pp_fog_color =
-      radeonPackColor( 4, c[0], c[1], c[2], 0 );
+   if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
+      RADEON_STATECHANGE( rmesa, fog );
+      rmesa->hw.fog.cmd[FOG_C] = c.i;
+      rmesa->hw.fog.cmd[FOG_D] = d.i;
+   }
 }
 
 
 /* =============================================================
- * Clipping
+ * Scissoring
  */
 
+
+static GLboolean intersect_rect( XF86DRIClipRectPtr out,
+				 XF86DRIClipRectPtr a,
+				 XF86DRIClipRectPtr b )
+{
+   *out = *a;
+   if ( b->x1 > out->x1 ) out->x1 = b->x1;
+   if ( b->y1 > out->y1 ) out->y1 = b->y1;
+   if ( b->x2 < out->x2 ) out->x2 = b->x2;
+   if ( b->y2 < out->y2 ) out->y2 = b->y2;
+   if ( out->x1 >= out->x2 ) return GL_FALSE;
+   if ( out->y1 >= out->y2 ) return GL_FALSE;
+   return GL_TRUE;
+}
+
+
+void radeonRecalcScissorRects( radeonContextPtr rmesa )
+{
+   XF86DRIClipRectPtr out;
+   int i;
+
+   /* Grow cliprect store?
+    */
+   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+	 rmesa->state.scissor.numAllocedClipRects += 1;	/* zero case */
+	 rmesa->state.scissor.numAllocedClipRects *= 2;
+      }
+
+      if (rmesa->state.scissor.pClipRects)
+	 FREE(rmesa->state.scissor.pClipRects);
+
+      rmesa->state.scissor.pClipRects = 
+	 MALLOC( rmesa->state.scissor.numAllocedClipRects * 
+		 sizeof(XF86DRIClipRectRec) );
+
+      if (!rmesa->state.scissor.numAllocedClipRects) {
+/*  	 FALLBACK( rmesa, RADEON_FALLBACK_MEMORY, GL_TRUE ); */
+	 rmesa->state.scissor.numAllocedClipRects = 0;
+	 return;
+      }
+   }
+   
+   out = rmesa->state.scissor.pClipRects;
+   rmesa->state.scissor.numClipRects = 0;
+
+   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
+      if ( intersect_rect( out, 
+			   &rmesa->pClipRects[i], 
+			   &rmesa->state.scissor.rect ) ) {
+	 rmesa->state.scissor.numClipRects++;
+	 out++;
+      }
+   }
+}
+
+
 static void radeonUpdateScissor( GLcontext *ctx )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
@@ -319,8 +467,7 @@ static void radeonUpdateScissor( GLcontext *ctx )
       rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
       rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
 
-      if ( ctx->Scissor.Enabled )
-	 rmesa->upload_cliprects = 1;
+      radeonRecalcScissorRects( rmesa );
    }
 }
 
@@ -330,10 +477,11 @@ static void radeonScissor( GLcontext *ctx,
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   if ( ctx->Scissor.Enabled )
+   if ( ctx->Scissor.Enabled ) {
       RADEON_FIREVERTICES( rmesa );	/* don't pipeline cliprect changes */
+      radeonUpdateScissor( ctx );
+   }
 
-   radeonUpdateScissor( ctx );
 }
 
 
@@ -344,27 +492,37 @@ static void radeonScissor( GLcontext *ctx,
 static void radeonCullFace( GLcontext *ctx, GLenum unused )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint s = rmesa->state.hw.setup1.se_cntl;
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+   GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
 
    s |= RADEON_FFACE_SOLID | RADEON_BFACE_SOLID;
+   t &= ~(RADEON_CULL_FRONT | RADEON_CULL_BACK);
 
    if ( ctx->Polygon.CullFlag ) {
       switch ( ctx->Polygon.CullFaceMode ) {
       case GL_FRONT:
 	 s &= ~RADEON_FFACE_SOLID;
+	 t |= RADEON_CULL_FRONT;
 	 break;
       case GL_BACK:
 	 s &= ~RADEON_BFACE_SOLID;
+	 t |= RADEON_CULL_BACK;
 	 break;
       case GL_FRONT_AND_BACK:
 	 s &= ~(RADEON_FFACE_SOLID | RADEON_BFACE_SOLID);
+	 t |= (RADEON_CULL_FRONT | RADEON_CULL_BACK);
 	 break;
       }
    }
 
-   if ( rmesa->state.hw.setup1.se_cntl != s ) {
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_SETUP);
-      rmesa->state.hw.setup1.se_cntl = s;
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      RADEON_STATECHANGE(rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+
+   if ( rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] != t ) {
+      RADEON_STATECHANGE(rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = t;
    }
 }
 
@@ -372,15 +530,19 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_SETUP );
-   rmesa->state.hw.setup1.se_cntl &= ~RADEON_FFACE_CULL_DIR_MASK;
+   RADEON_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
+
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_CULL_FRONT_IS_CCW;
 
    switch ( mode ) {
    case GL_CW:
-      rmesa->state.hw.setup1.se_cntl |= RADEON_FFACE_CULL_CW;
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CW;
       break;
    case GL_CCW:
-      rmesa->state.hw.setup1.se_cntl |= RADEON_FFACE_CULL_CCW;
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CCW;
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_CULL_FRONT_IS_CCW;
       break;
    }
 }
@@ -393,15 +555,16 @@ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_LINE | RADEON_UPLOAD_SETUP );
+   RADEON_STATECHANGE( rmesa, lin );
+   RADEON_STATECHANGE( rmesa, set );
 
    /* Line width is stored in U6.4 format.
     */
-   rmesa->state.hw.line.se_line_width = (GLuint)(widthf * 16.0);
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (GLuint)(widthf * 16.0);
    if ( widthf > 1.0 ) {
-      rmesa->state.hw.setup1.se_cntl |=  RADEON_WIDELINE_ENABLE;
+      rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_WIDELINE_ENABLE;
    } else {
-      rmesa->state.hw.setup1.se_cntl &= ~RADEON_WIDELINE_ENABLE;
+      rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_WIDELINE_ENABLE;
    }
 }
 
@@ -409,10 +572,9 @@ static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_LINE );
-
-   rmesa->state.hw.line.re_line_pattern = ((((GLuint)factor & 0xff) << 16) |
-					((GLuint)pattern));
+   RADEON_STATECHANGE( rmesa, lin );
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = 
+      ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
 }
 
 
@@ -430,9 +592,9 @@ static void radeonColorMask( GLcontext *ctx,
 				  ctx->Color.ColorMask[BCOMP],
 				  ctx->Color.ColorMask[ACOMP] );
 
-   if ( rmesa->state.hw.mask.rb3d_planemask != mask ) {
-      RADEON_STATECHANGE( rmesa,  RADEON_UPLOAD_MASKS );
-      rmesa->state.hw.mask.rb3d_planemask = mask;
+   if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
+      RADEON_STATECHANGE( rmesa, msk );
+      rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = mask;
    }
 }
 
@@ -447,9 +609,9 @@ static void radeonPolygonOffset( GLcontext *ctx,
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    GLfloat constant = units * rmesa->state.depth.scale;
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_ZBIAS );
-   rmesa->state.hw.zbias.se_zbias_factor   = *(GLuint *)&factor;
-   rmesa->state.hw.zbias.se_zbias_constant = *(GLuint *)&constant;
+   RADEON_STATECHANGE( rmesa, zbs );
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = *(GLuint *)&factor;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = *(GLuint *)&constant;
 }
 
 static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
@@ -464,6 +626,8 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
       rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i];
    }
 
+   /* TODO: push this into cmd mechanism
+    */
    RADEON_FIREVERTICES( rmesa );
    LOCK_HARDWARE( rmesa );
 
@@ -475,6 +639,21 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
    UNLOCK_HARDWARE( rmesa );
 }
 
+static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
+
+   /* Can't generally do unfilled via tcl, but some good special
+    * cases work. 
+    */
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag);
+   if (rmesa->TclFallback) {
+      radeonChooseRenderState( ctx );
+      radeonChooseVertexState( ctx );
+   }
+}
+
 
 /* =============================================================
  * Rendering attributes
@@ -490,34 +669,477 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
 static void radeonUpdateSpecular( GLcontext *ctx )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   CARD32 p = rmesa->state.hw.context.pp_cntl;
+   CARD32 p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
 
-   if ( ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR &&
-        ctx->Light.Enabled) {
+   if ( ctx->_TriangleCaps & DD_SEPARATE_SPECULAR ) {
       p |=  RADEON_SPECULAR_ENABLE;
    } else {
       p &= ~RADEON_SPECULAR_ENABLE;
    }
 
-   if ( rmesa->state.hw.context.pp_cntl != p ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-      rmesa->state.hw.context.pp_cntl = p;
+   if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p;
+   }
+
+   /* Bizzare: have to leave lighting enabled to get fog.
+    */
+   RADEON_STATECHANGE( rmesa, tcl );
+   if ((ctx->Light.Enabled &&
+	ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR)) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+   }
+   else if (ctx->Fog.Enabled) {
+      if (ctx->Light.Enabled) {
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      } else {
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      }
+   }
+   else if (ctx->Light.Enabled) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+   } else if (ctx->Fog.ColorSumEnabled ) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE;
+   }
+
+   /* Update vertex/render formats
+    */
+   if (rmesa->TclFallback) { 
+      radeonChooseRenderState( ctx );
+      radeonChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Materials
+ */
+
+
+/* Update on colormaterial, material emmissive/ambient, 
+ * lightmodel.globalambient
+ */
+static void update_global_ambient( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   float *fcmd = (float *)RADEON_DB_STATE( glt );
+
+   /* Need to do more if both emmissive & ambient are PREMULT:
+    */
+   if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &
+       ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+	(3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) 
+   {
+      COPY_3V( &fcmd[GLT_RED], 
+	       ctx->Light.Material[0].Emission);
+      ACC_SCALE_3V( &fcmd[GLT_RED],
+		   ctx->Light.Model.Ambient,
+		   ctx->Light.Material[0].Ambient);
+   } 
+   else
+   {
+      COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
+   }
+   
+   RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
+}
+
+/* Update on change to 
+ *    - light[p].colors
+ *    - light[p].enabled
+ *    - material,
+ *    - colormaterial enabled
+ *    - colormaterial bitmask
+ */
+static void update_light_colors( GLcontext *ctx, GLuint p )
+{
+   struct gl_light *l = &ctx->Light.Light[p];
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   if (l->Enabled) {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
+      GLuint bitmask = ctx->Light.ColorMaterialBitmask;
+      struct gl_material *mat = &ctx->Light.Material[0];
+
+      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );	 
+      COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
+      COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
+      
+      if (!ctx->Light.ColorMaterialEnabled)
+	 bitmask = 0;
+
+      if ((bitmask & FRONT_AMBIENT_BIT) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_AMBIENT_RED], mat->Ambient );
+
+      if ((bitmask & FRONT_DIFFUSE_BIT) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_DIFFUSE_RED], mat->Diffuse );
+      
+      if ((bitmask & FRONT_SPECULAR_BIT) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_SPECULAR_RED], mat->Specular );
+
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+   }
+}
+
+/* Also fallback for asym colormaterial mode in twoside lighting...
+ */
+static void check_twoside_fallback( GLcontext *ctx )
+{
+   GLboolean fallback = GL_FALSE;
+
+   if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+      if (memcmp( &ctx->Light.Material[0],
+		  &ctx->Light.Material[1],
+		  sizeof(struct gl_material)) != 0)
+	 fallback = GL_TRUE;  
+      else if (ctx->Light.ColorMaterialEnabled &&
+	       (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != 
+	       ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1))
+	 fallback = GL_TRUE;
+   }
+
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_LIGHT_TWOSIDE, fallback );
+}
+
+static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   if (ctx->Light.ColorMaterialEnabled) {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      GLuint light_model_ctl = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+      GLuint mask = ctx->Light.ColorMaterialBitmask;
+
+      /* Default to PREMULT:
+       */
+      light_model_ctl &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+			   (3 << RADEON_AMBIENT_SOURCE_SHIFT) |
+			   (3 << RADEON_DIFFUSE_SOURCE_SHIFT) |
+			   (3 << RADEON_SPECULAR_SOURCE_SHIFT)); 
+   
+      if (mask & FRONT_EMISSION_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_EMISSIVE_SOURCE_SHIFT);
+      }
+
+      if (mask & FRONT_AMBIENT_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_AMBIENT_SOURCE_SHIFT);
+      }
+	 
+      if (mask & FRONT_DIFFUSE_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_DIFFUSE_SOURCE_SHIFT);
+      }
+   
+      if (mask & FRONT_SPECULAR_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_SPECULAR_SOURCE_SHIFT);
+      }
+   
+      if (light_model_ctl != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) {
+	 GLuint p;
+
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl;      
+
+	 for (p = 0 ; p < MAX_LIGHTS; p++) 
+	    update_light_colors( ctx, p );
+      }
+   }
+   
+   check_twoside_fallback( ctx );
+}
+
+void radeonUpdateMaterial( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
+   GLuint p;
+   GLuint mask = ~0;
+   
+   if (ctx->Light.ColorMaterialEnabled)
+      mask &= ~ctx->Light.ColorMaterialBitmask;
+
+   if (RADEON_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+      
+   if (mask & FRONT_EMISSION_BIT) {
+      fcmd[MTL_EMMISSIVE_RED]   = ctx->Light.Material[0].Emission[0];
+      fcmd[MTL_EMMISSIVE_GREEN] = ctx->Light.Material[0].Emission[1];
+      fcmd[MTL_EMMISSIVE_BLUE]  = ctx->Light.Material[0].Emission[2];
+      fcmd[MTL_EMMISSIVE_ALPHA] = ctx->Light.Material[0].Emission[3];
+   }
+   if (mask & FRONT_AMBIENT_BIT) {
+      fcmd[MTL_AMBIENT_RED]     = ctx->Light.Material[0].Ambient[0];
+      fcmd[MTL_AMBIENT_GREEN]   = ctx->Light.Material[0].Ambient[1];
+      fcmd[MTL_AMBIENT_BLUE]    = ctx->Light.Material[0].Ambient[2];
+      fcmd[MTL_AMBIENT_ALPHA]   = ctx->Light.Material[0].Ambient[3];
+   }
+   if (mask & FRONT_DIFFUSE_BIT) {
+      fcmd[MTL_DIFFUSE_RED]     = ctx->Light.Material[0].Diffuse[0];
+      fcmd[MTL_DIFFUSE_GREEN]   = ctx->Light.Material[0].Diffuse[1];
+      fcmd[MTL_DIFFUSE_BLUE]    = ctx->Light.Material[0].Diffuse[2];
+      fcmd[MTL_DIFFUSE_ALPHA]   = ctx->Light.Material[0].Diffuse[3];
+   }
+   if (mask & FRONT_SPECULAR_BIT) {
+      fcmd[MTL_SPECULAR_RED]    = ctx->Light.Material[0].Specular[0];
+      fcmd[MTL_SPECULAR_GREEN]  = ctx->Light.Material[0].Specular[1];
+      fcmd[MTL_SPECULAR_BLUE]   = ctx->Light.Material[0].Specular[2];
+      fcmd[MTL_SPECULAR_ALPHA]  = ctx->Light.Material[0].Specular[3];
+   }
+   if (mask & FRONT_SHININESS_BIT) {
+      fcmd[MTL_SHININESS]       = ctx->Light.Material[0].Shininess;
+   }
+
+   if (RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mtl )) {
+      for (p = 0 ; p < MAX_LIGHTS; p++) 
+	 update_light_colors( ctx, p );
+
+      check_twoside_fallback( ctx );
+      update_global_ambient( ctx );
+   }
+   else if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_STATE))
+      fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__);
+}
+
+/* _NEW_LIGHT
+ * _NEW_MODELVIEW
+ * _MESA_NEW_NEED_EYE_COORDS
+ *
+ * Uses derived state from mesa:
+ *       _VP_inf_norm
+ *       _h_inf_norm
+ *       _Position
+ *       _NormDirection
+ *       _ModelViewInvScale
+ *       _NeedEyeCoords
+ *       _EyeZDir
+ *
+ * which are calculated in light.c and are correct for the current
+ * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
+ * and _MESA_NEW_NEED_EYE_COORDS.  
+ */
+static void update_light( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   /* Have to check these, or have an automatic shortcircuit mechanism
+    * to remove noop statechanges. (Or just do a better job on the
+    * front end).
+    */
+   {
+      GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+
+      if (ctx->_NeedEyeCoords)
+	 tmp &= ~RADEON_LIGHT_IN_MODELSPACE;
+      else
+	 tmp |= RADEON_LIGHT_IN_MODELSPACE;
+      
+
+      /* Leave this test disabled: (unexplained q3 lockup) (even with
+         new packets)
+      */
+      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) 
+      {
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp;
+      }
+   }
+
+   {
+      GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( eye );
+      fcmd[EYE_X] = ctx->_EyeZDir[0];
+      fcmd[EYE_Y] = ctx->_EyeZDir[1];
+      fcmd[EYE_Z] = - ctx->_EyeZDir[2];
+      fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale;
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.eye );
+   }
+
+
+/*     RADEON_STATECHANGE( rmesa, glt ); */
+
+   if (ctx->Light.Enabled) {
+      GLint p;
+      for (p = 0 ; p < MAX_LIGHTS; p++) {
+	 if (ctx->Light.Light[p].Enabled) {
+	    struct gl_light *l = &ctx->Light.Light[p];
+	    GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] );
+	    
+	    if (l->EyePosition[3] == 0.0) {
+	       COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); 
+	       COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); 
+	       fcmd[LIT_POSITION_W] = 0;
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    } else {
+	       COPY_4V( &fcmd[LIT_POSITION_X], l->_Position );
+	       fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0];
+	       fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1];
+	       fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2];
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    }
+
+	    RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+	 }
+      }
+   }
+}
+
+static void radeonLightfv( GLcontext *ctx, GLenum light,
+			   GLenum pname, const GLfloat *params )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLint p = light - GL_LIGHT0;
+   struct gl_light *l = &ctx->Light.Light[p];
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+   
+
+   switch (pname) {
+   case GL_AMBIENT:		
+   case GL_DIFFUSE:
+   case GL_SPECULAR:
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_SPOT_DIRECTION: 
+      /* picked up in update_light */	
+      break;
+
+   case GL_POSITION: {
+      /* positions picked up in update_light, but can do flag here */	
+      GLuint flag = (p&1)? RADEON_LIGHT_1_IS_LOCAL : RADEON_LIGHT_0_IS_LOCAL;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->EyePosition[3] != 0.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+
+   case GL_SPOT_EXPONENT:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_EXPONENT] = params[0];
+      break;
+
+   case GL_SPOT_CUTOFF: {
+      GLuint flag = (p&1) ? RADEON_LIGHT_1_IS_SPOT : RADEON_LIGHT_0_IS_SPOT;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff;
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->SpotCutoff != 180.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+
+   case GL_CONSTANT_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_CONST] = params[0];
+      break;
+   case GL_LINEAR_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_LINEAR] = params[0];
+      break;
+   case GL_QUADRATIC_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_QUADRATIC] = params[0];
+      break;
+   default:
+      return;
    }
+
 }
 
+		  
+
 
 static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
 				const GLfloat *param )
 {
-   if ( pname == GL_LIGHT_MODEL_COLOR_CONTROL ) {
-      radeonUpdateSpecular(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   switch (pname) {
+      case GL_LIGHT_MODEL_AMBIENT: 
+	 update_global_ambient( ctx );
+	 break;
+
+      case GL_LIGHT_MODEL_LOCAL_VIEWER:
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.LocalViewer)
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LOCAL_VIEWER;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LOCAL_VIEWER;
+         break;
+
+      case GL_LIGHT_MODEL_TWO_SIDE:
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.TwoSide)
+	    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_LIGHT_TWOSIDE;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_LIGHT_TWOSIDE;
+
+	 check_twoside_fallback( ctx );
+
+	 if (rmesa->TclFallback) {
+	    radeonChooseRenderState( ctx );
+	    radeonChooseVertexState( ctx );
+	 }
+         break;
+
+      case GL_LIGHT_MODEL_COLOR_CONTROL:
+	 radeonUpdateSpecular(ctx);
+
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) 
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= 
+	       ~RADEON_DIFFUSE_SPECULAR_COMBINE;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= 
+	       RADEON_DIFFUSE_SPECULAR_COMBINE;
+         break;
+
+      default:
+         break;
    }
 }
 
 static void radeonShadeModel( GLcontext *ctx, GLenum mode )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint s = rmesa->state.hw.setup1.se_cntl;
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
 
    s &= ~(RADEON_DIFFUSE_SHADE_MASK |
 	  RADEON_ALPHA_SHADE_MASK |
@@ -541,9 +1163,45 @@ static void radeonShadeModel( GLcontext *ctx, GLenum mode )
       return;
    }
 
-   if ( rmesa->state.hw.setup1.se_cntl != s ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_SETUP );
-      rmesa->state.hw.setup1.se_cntl = s;
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+}
+
+
+/* =============================================================
+ * User clip planes
+ */
+
+static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+   GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+   RADEON_STATECHANGE( rmesa, ucp[p] );
+   rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+   rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+   rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+   rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+}
+
+static void radeonUpdateClipPlanes( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint p;
+
+   for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+      if (ctx->Transform.ClipEnabled[p]) {
+	 GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+	 RADEON_STATECHANGE( rmesa, ucp[p] );
+	 rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+	 rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+	 rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+	 rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+      }
    }
 }
 
@@ -559,50 +1217,50 @@ static void radeonStencilFunc( GLcontext *ctx, GLenum func,
    GLuint refmask = ((ctx->Stencil.Ref << RADEON_STENCIL_REF_SHIFT) |
 		     (ctx->Stencil.ValueMask << RADEON_STENCIL_MASK_SHIFT));
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_MASKS );
+   RADEON_STATECHANGE( rmesa, ctx );
+   RADEON_STATECHANGE( rmesa, msk );
 
-   rmesa->state.hw.context.rb3d_zstencilcntl &= ~RADEON_STENCIL_TEST_MASK;
-   rmesa->state.hw.mask.rb3d_stencilrefmask &= ~(RADEON_STENCIL_REF_MASK|
-					      RADEON_STENCIL_VALUE_MASK);
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_STENCIL_TEST_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~(RADEON_STENCIL_REF_MASK|
+						   RADEON_STENCIL_VALUE_MASK);
 
    switch ( ctx->Stencil.Function ) {
    case GL_NEVER:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_NEVER;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEVER;
       break;
    case GL_LESS:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_LESS;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LESS;
       break;
    case GL_EQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_EQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_EQUAL;
       break;
    case GL_LEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_LEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LEQUAL;
       break;
    case GL_GREATER:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_GREATER;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GREATER;
       break;
    case GL_NOTEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_NEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEQUAL;
       break;
    case GL_GEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_GEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GEQUAL;
       break;
    case GL_ALWAYS:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_ALWAYS;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_ALWAYS;
       break;
    }
 
-   rmesa->state.hw.mask.rb3d_stencilrefmask |= refmask;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
 }
 
 static void radeonStencilMask( GLcontext *ctx, GLuint mask )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_MASKS );
-   rmesa->state.hw.mask.rb3d_stencilrefmask &= ~RADEON_STENCIL_WRITE_MASK;
-
-   rmesa->state.hw.mask.rb3d_stencilrefmask |=
+   RADEON_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
       (ctx->Stencil.WriteMask << RADEON_STENCIL_WRITEMASK_SHIFT);
 }
 
@@ -611,71 +1269,71 @@ static void radeonStencilOp( GLcontext *ctx, GLenum fail,
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-   rmesa->state.hw.context.rb3d_zstencilcntl &= ~(RADEON_STENCIL_FAIL_MASK |
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(RADEON_STENCIL_FAIL_MASK |
 					       RADEON_STENCIL_ZFAIL_MASK |
 					       RADEON_STENCIL_ZPASS_MASK);
 
    switch ( ctx->Stencil.FailFunc ) {
    case GL_KEEP:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_KEEP;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_KEEP;
       break;
    case GL_ZERO:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_ZERO;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_ZERO;
       break;
    case GL_REPLACE:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_REPLACE;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_REPLACE;
       break;
    case GL_INCR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_INC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INC;
       break;
    case GL_DECR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_DEC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_DEC;
       break;
    case GL_INVERT:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_INVERT;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INVERT;
       break;
    }
 
    switch ( ctx->Stencil.ZFailFunc ) {
    case GL_KEEP:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_KEEP;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_KEEP;
       break;
    case GL_ZERO:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_ZERO;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_ZERO;
       break;
    case GL_REPLACE:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_REPLACE;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_REPLACE;
       break;
    case GL_INCR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_INC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INC;
       break;
    case GL_DECR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_DEC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_DEC;
       break;
    case GL_INVERT:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_INVERT;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INVERT;
       break;
    }
 
    switch ( ctx->Stencil.ZPassFunc ) {
    case GL_KEEP:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_KEEP;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_KEEP;
       break;
    case GL_ZERO:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_ZERO;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_ZERO;
       break;
    case GL_REPLACE:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_REPLACE;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_REPLACE;
       break;
    case GL_INCR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_INC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INC;
       break;
    case GL_DECR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_DEC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_DEC;
       break;
    case GL_INVERT:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_INVERT;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INVERT;
       break;
    }
 }
@@ -684,9 +1342,10 @@ static void radeonClearStencil( GLcontext *ctx, GLint s )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   rmesa->state.stencil.clear = ((GLuint) ctx->Stencil.Clear |
-				 (0xff << RADEON_STENCIL_MASK_SHIFT) |
-				 (ctx->Stencil.WriteMask << RADEON_STENCIL_WRITEMASK_SHIFT));
+   rmesa->state.stencil.clear = 
+      ((GLuint) ctx->Stencil.Clear |
+       (0xff << RADEON_STENCIL_MASK_SHIFT) |
+       (ctx->Stencil.WriteMask << RADEON_STENCIL_WRITEMASK_SHIFT));
 }
 
 
@@ -714,16 +1373,15 @@ void radeonUpdateWindow( GLcontext *ctx )
    GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
    GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
 
-/*     fprintf(stderr, "radeonUpdateWindow %d,%d %dx%d\n", */
-/*  	   dPriv->x, dPriv->y, dPriv->w, dPriv->h); */
-
-   RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_VIEWPORT);
-   rmesa->state.hw.viewport.se_vport_xscale  = *(GLuint *)&sx;
-   rmesa->state.hw.viewport.se_vport_xoffset = *(GLuint *)&tx;
-   rmesa->state.hw.viewport.se_vport_yscale  = *(GLuint *)&sy;
-   rmesa->state.hw.viewport.se_vport_yoffset = *(GLuint *)&ty;
-   rmesa->state.hw.viewport.se_vport_zscale  = *(GLuint *)&sz;
-   rmesa->state.hw.viewport.se_vport_zoffset = *(GLuint *)&tz;
+   RADEON_FIREVERTICES( rmesa );
+   RADEON_STATECHANGE( rmesa, vpt );
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = *(GLuint *)&sx;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = *(GLuint *)&sy;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = *(GLuint *)&sz;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = *(GLuint *)&tz;
 }
 
 
@@ -756,29 +1414,19 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
    GLfloat tx = v[MAT_TX] + xoffset;
    GLfloat ty = (- v[MAT_TY]) + yoffset;
 
-   if ( rmesa->state.hw.viewport.se_vport_xoffset != tx ||
-	rmesa->state.hw.viewport.se_vport_yoffset != ty )
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx ||
+	rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty )
    {
-      rmesa->state.hw.viewport.se_vport_xoffset = *(GLuint *)&tx;
-      rmesa->state.hw.viewport.se_vport_yoffset = *(GLuint *)&ty;
-
-      if (rmesa->store.statenr) {
-	 int i;
-	 rmesa->store.state[0].dirty |= RADEON_UPLOAD_VIEWPORT;
-	 /* Note: assume vport x/yoffset are constant over the buffer:
-	  */
-	 for (i = 0 ; i < rmesa->store.statenr ; i++) {
-	    rmesa->store.state[i].viewport.se_vport_xoffset = *(GLuint *)&tx;
-	    rmesa->store.state[i].viewport.se_vport_yoffset = *(GLuint *)&ty;
-	 }
-      } else {
-	 rmesa->state.hw.dirty |= RADEON_UPLOAD_VIEWPORT;
-      }
-
+      /* Note: this should also modify whatever data the context reset
+       * code uses...
+       */
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
+      
       /* update polygon stipple x/y screen offset */
       {
          GLuint stx, sty;
-         GLuint m = rmesa->state.hw.misc.re_misc;
+         GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC];
 
          m &= ~(RADEON_STIPPLE_X_OFFSET_MASK |
                 RADEON_STIPPLE_Y_OFFSET_MASK);
@@ -791,9 +1439,9 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
          m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
                (sty << RADEON_STIPPLE_Y_OFFSET_SHIFT));
 
-         if ( rmesa->state.hw.misc.re_misc != m ) {
-            rmesa->state.hw.misc.re_misc = m;
-            RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_MISC);
+         if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) {
+            RADEON_STATECHANGE( rmesa, msc );
+	    rmesa->hw.msc.cmd[MSC_RE_MISC] = m;
          }
       }
    }
@@ -848,8 +1496,8 @@ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
 
    ASSERT( rop < 16 );
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_MASKS );
-   rmesa->state.hw.mask.rb3d_ropcntl = radeon_rop_tab[rop];
+   RADEON_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = radeon_rop_tab[rop];
 }
 
 
@@ -863,7 +1511,9 @@ void radeonSetCliprects( radeonContextPtr rmesa, GLenum mode )
       rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
       break;
    case GL_BACK_LEFT:
-      if ( dPriv->numBackClipRects == 0 ) {
+      /* Can't ignore 2d windows if we are page flipping.
+       */
+      if ( dPriv->numBackClipRects == 0 || rmesa->doPageFlip ) {
 	 rmesa->numClipRects = dPriv->numClipRects;
 	 rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
       }
@@ -873,43 +1523,57 @@ void radeonSetCliprects( radeonContextPtr rmesa, GLenum mode )
       }
       break;
    default:
+      fprintf(stderr, "bad mode in radeonSetCliprects\n");
       return;
    }
 
-   rmesa->upload_cliprects = 1;
+   if (rmesa->state.scissor.enabled)
+      radeonRecalcScissorRects( rmesa );
 }
 
 
-static GLboolean radeonSetDrawBuffer( GLcontext *ctx, GLenum mode )
+static void radeonSetDrawBuffer( GLcontext *ctx, GLenum mode )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
+   if (RADEON_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "%s %s\n", __FUNCTION__,
+	      _mesa_lookup_enum_by_nr( mode ));
+
    RADEON_FIREVERTICES(rmesa);	/* don't pipeline cliprect changes */
 
    switch ( mode ) {
    case GL_FRONT_LEFT:
       FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
-      rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+      if ( rmesa->doPageFlip && rmesa->sarea->pfCurrentPage == 1 ) {
+        rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+      } else {
+      	rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+      }
       radeonSetCliprects( rmesa, GL_FRONT_LEFT );
       break;
    case GL_BACK_LEFT:
       FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
-      rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+      if ( rmesa->doPageFlip && rmesa->sarea->pfCurrentPage == 1 ) {
+      	rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+      } else {
+        rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+      }
       radeonSetCliprects( rmesa, GL_BACK_LEFT );
       break;
    default:
       FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
-      return GL_FALSE;
+      return;
    }
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-   rmesa->state.hw.context.rb3d_coloroffset = (rmesa->state.color.drawOffset &
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = (rmesa->state.color.drawOffset &
 					    RADEON_COLOROFFSET_MASK);
-   rmesa->state.hw.context.rb3d_colorpitch = rmesa->state.color.drawPitch;
-
-   return GL_TRUE;
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch;
 }
 
 
@@ -920,8 +1584,9 @@ static GLboolean radeonSetDrawBuffer( GLcontext *ctx, GLenum mode )
 static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint p, flag;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API )
+   if ( RADEON_DEBUG & DEBUG_STATE )
       fprintf( stderr, __FUNCTION__"( %s = %s )\n",
 	       _mesa_lookup_enum_by_nr( cap ),
 	       state ? "GL_TRUE" : "GL_FALSE" );
@@ -935,373 +1600,487 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
       break;
 
    case GL_ALPHA_TEST:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE( rmesa, ctx );
       if (state) {
-	 rmesa->state.hw.context.pp_cntl |= RADEON_ALPHA_TEST_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_ALPHA_TEST_ENABLE;
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_ALPHA_TEST_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ALPHA_TEST_ENABLE;
       }
       break;
 
    case GL_BLEND:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE( rmesa, ctx );
       if (state) {
-	 rmesa->state.hw.context.rb3d_cntl |=  RADEON_ALPHA_BLEND_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ALPHA_BLEND_ENABLE;
       } else {
-	 rmesa->state.hw.context.rb3d_cntl &= ~RADEON_ALPHA_BLEND_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ALPHA_BLEND_ENABLE;
+      }
+      if ( ctx->Color.ColorLogicOpEnabled ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
+      }
+      break;
+
+   case GL_CLIP_PLANE0:
+   case GL_CLIP_PLANE1:
+   case GL_CLIP_PLANE2:
+   case GL_CLIP_PLANE3:
+   case GL_CLIP_PLANE4:
+   case GL_CLIP_PLANE5: 
+      p = cap-GL_CLIP_PLANE0;
+      RADEON_STATECHANGE( rmesa, tcl );
+      if (state) {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (RADEON_UCP_ENABLE_0<<p);
+	 radeonClipPlane( ctx, cap, NULL );
+      }
+      else {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(RADEON_UCP_ENABLE_0<<p);
       }
       break;
 
+   case GL_COLOR_MATERIAL:
+      radeonColorMaterial( ctx, 0, 0 );
+      if (!state) 
+	 radeonUpdateMaterial( ctx );
+      break;
+
    case GL_CULL_FACE:
       radeonCullFace( ctx, 0 );
       break;
 
    case GL_DEPTH_TEST:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE(rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.rb3d_cntl |=  RADEON_Z_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_Z_ENABLE;
       } else {
-	 rmesa->state.hw.context.rb3d_cntl &= ~RADEON_Z_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_Z_ENABLE;
       }
       break;
 
    case GL_DITHER:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE(rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.rb3d_cntl |=  RADEON_DITHER_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
       } else {
-	 rmesa->state.hw.context.rb3d_cntl &= ~RADEON_DITHER_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
       }
       break;
 
    case GL_FOG:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE(rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.pp_cntl |=  RADEON_FOG_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_FOG_ENABLE;
+	 radeonFogfv( ctx, GL_FOG_MODE, 0 );
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_FOG_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_FOG_ENABLE;
+	 RADEON_STATECHANGE(rmesa, tcl);
+	 rmesa->hw.ctx.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
       }
+      radeonUpdateSpecular( ctx ); /* for PK_SPEC */
+      if (rmesa->TclFallback) 
+	 radeonChooseVertexState( ctx );
+      break;
+
+   case GL_LIGHT0:
+   case GL_LIGHT1:
+   case GL_LIGHT2:
+   case GL_LIGHT3:
+   case GL_LIGHT4:
+   case GL_LIGHT5:
+   case GL_LIGHT6:
+   case GL_LIGHT7:
+      RADEON_STATECHANGE(rmesa, tcl);
+      p = cap - GL_LIGHT0;
+      if (p&1) 
+	 flag = (RADEON_LIGHT_1_ENABLE |
+		 RADEON_LIGHT_1_ENABLE_AMBIENT | 
+		 RADEON_LIGHT_1_ENABLE_SPECULAR);
+      else
+	 flag = (RADEON_LIGHT_0_ENABLE |
+		 RADEON_LIGHT_0_ENABLE_AMBIENT | 
+		 RADEON_LIGHT_0_ENABLE_SPECULAR);
+
+      if (state)
+	 rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
+
+      /* 
+       */
+      update_light_colors( ctx, p );
       break;
 
    case GL_LIGHTING:
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (state) {
+/*  	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; */
+/*  	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE; */
+      }
+      else {
+/*  	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE; */
+/*  	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE; */
+      }
       radeonUpdateSpecular(ctx);
+      check_twoside_fallback( ctx );
       break;
 
    case GL_LINE_SMOOTH:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+      RADEON_STATECHANGE( rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.pp_cntl |=  RADEON_ANTI_ALIAS_LINE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_ANTI_ALIAS_LINE;
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_ANTI_ALIAS_LINE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_LINE;
       }
       break;
 
    case GL_LINE_STIPPLE:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+      RADEON_STATECHANGE( rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.pp_cntl |=  RADEON_PATTERN_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_PATTERN_ENABLE;
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_PATTERN_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_PATTERN_ENABLE;
       }
       break;
 
    case GL_COLOR_LOGIC_OP:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+      RADEON_STATECHANGE( rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.rb3d_cntl |=  RADEON_ROP_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
       } else {
-	 rmesa->state.hw.context.rb3d_cntl &= ~RADEON_ROP_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
       }
       break;
-
-   case GL_POLYGON_OFFSET_POINT:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_SETUP );
+      
+   case GL_NORMALIZE:
+      RADEON_STATECHANGE( rmesa, tcl );
       if ( state ) {
-	 rmesa->state.hw.setup1.se_cntl |=  RADEON_ZBIAS_ENABLE_POINT;
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_NORMALIZE_NORMALS;
       } else {
-	 rmesa->state.hw.setup1.se_cntl &= ~RADEON_ZBIAS_ENABLE_POINT;
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_NORMALIZE_NORMALS;
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_POINT:
+      if (rmesa->dri.drmMinor == 1) {
+	 radeonChooseRenderState( ctx );
+      } 
+      else {
+	 RADEON_STATECHANGE( rmesa, set );
+	 if ( state ) {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_POINT;
+	 } else {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_POINT;
+	 }
       }
       break;
 
    case GL_POLYGON_OFFSET_LINE:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_SETUP );
-      if ( state ) {
-	 rmesa->state.hw.setup1.se_cntl |=  RADEON_ZBIAS_ENABLE_LINE;
-      } else {
-	 rmesa->state.hw.setup1.se_cntl &= ~RADEON_ZBIAS_ENABLE_LINE;
+      if (rmesa->dri.drmMinor == 1) {
+	 radeonChooseRenderState( ctx );
+      } 
+      else {
+	 RADEON_STATECHANGE( rmesa, set );
+	 if ( state ) {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_LINE;
+	 } else {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_LINE;
+	 }
       }
       break;
 
    case GL_POLYGON_OFFSET_FILL:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_SETUP );
-      if ( state ) {
-	 rmesa->state.hw.setup1.se_cntl |=  RADEON_ZBIAS_ENABLE_TRI;
-      } else {
-	 rmesa->state.hw.setup1.se_cntl &= ~RADEON_ZBIAS_ENABLE_TRI;
+      if (rmesa->dri.drmMinor == 1) {
+	 radeonChooseRenderState( ctx );
+      } 
+      else {
+	 RADEON_STATECHANGE( rmesa, set );
+	 if ( state ) {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_TRI;
+	 } else {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_TRI;
+	 }
       }
       break;
 
    case GL_POLYGON_SMOOTH:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+      RADEON_STATECHANGE( rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.pp_cntl |=  RADEON_ANTI_ALIAS_POLY;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_ANTI_ALIAS_POLY;
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_ANTI_ALIAS_POLY;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_POLY;
       }
       break;
 
    case GL_POLYGON_STIPPLE:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE(rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.pp_cntl |=  RADEON_STIPPLE_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_STIPPLE_ENABLE;
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_STIPPLE_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_STIPPLE_ENABLE;
       }
       break;
 
+   case GL_RESCALE_NORMAL_EXT: {
+      GLboolean tmp = ctx->_NeedEyeCoords ? state : !state;
+      RADEON_STATECHANGE( rmesa, tcl );
+      if ( tmp ) {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_RESCALE_NORMALS;
+      } else {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
+      }
+      break;
+   }
+
    case GL_SCISSOR_TEST:
       RADEON_FIREVERTICES( rmesa );
       rmesa->state.scissor.enabled = state;
-      rmesa->upload_cliprects = 1;
+      radeonUpdateScissor( ctx );
       break;
 
    case GL_STENCIL_TEST:
       if ( rmesa->state.stencil.hwBuffer ) {
-	 RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+	 RADEON_STATECHANGE( rmesa, ctx );
 	 if ( state ) {
-	    rmesa->state.hw.context.rb3d_cntl |=  RADEON_STENCIL_ENABLE;
+	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
 	 } else {
-	    rmesa->state.hw.context.rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
+	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE;
 	 }
       } else {
 	 FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state );
       }
       break;
 
+   case GL_TEXTURE_GEN_Q:
+   case GL_TEXTURE_GEN_R:
+   case GL_TEXTURE_GEN_S:
+   case GL_TEXTURE_GEN_T:
+      /* Picked up in radeonUpdateTextureState.
+       */
+      rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; 
+      break;
+
+   case GL_COLOR_SUM_EXT:
+      radeonUpdateSpecular ( ctx );
+      break;
+
    default:
       return;
    }
 }
 
 
+static void radeonLightingSpaceChange( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLboolean tmp;
+   RADEON_STATECHANGE( rmesa, tcl );
+
+   if (RADEON_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, ctx->_NeedEyeCoords);
+
+   if (ctx->_NeedEyeCoords)
+      tmp = ctx->Transform.RescaleNormals;
+   else
+      tmp = !ctx->Transform.RescaleNormals;
+
+   if ( tmp ) {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_RESCALE_NORMALS;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
+   }
+}
+
 /* =============================================================
- * State initialization, management
+ * Deferred state management - matrices, textures, other?
  */
 
-void radeonPrintDirty( const char *msg, GLuint state )
-{
-   fprintf( stderr,
-	    "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s\n",
-	    msg,
-	    state,
-	    (state & RADEON_UPLOAD_CONTEXT)     ? "context, " : "",
-	    (state & RADEON_UPLOAD_LINE)        ? "line, " : "",
-	    (state & RADEON_UPLOAD_BUMPMAP)     ? "bumpmap, " : "",
-	    (state & RADEON_UPLOAD_MASKS)       ? "masks, " : "",
-	    (state & RADEON_UPLOAD_VIEWPORT)    ? "viewport, " : "",
-	    (state & RADEON_UPLOAD_SETUP)       ? "setup, " : "",
-	    (state & RADEON_UPLOAD_TCL)         ? "tcl, " : "",
-	    (state & RADEON_UPLOAD_MISC)        ? "misc, " : "",
-	    (state & RADEON_UPLOAD_TEX0)        ? "tex0, " : "",
-	    (state & RADEON_UPLOAD_TEX1)        ? "tex1, " : "",
-	    (state & RADEON_UPLOAD_TEX2)        ? "tex2, " : "");
-}
 
 
 
+static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   int i;
 
 
-static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
+   for (i = 0 ; i < 4 ; i++) {
+      *dest++ = src[i];
+      *dest++ = src[i+4];
+      *dest++ = src[i+8];
+      *dest++ = src[i+12];
+   }
+
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
 {
-   _swrast_InvalidateState( ctx, new_state );
-   _swsetup_InvalidateState( ctx, new_state );
-   _ac_InvalidateState( ctx, new_state );
-   _tnl_InvalidateState( ctx, new_state );
-   RADEON_CONTEXT(ctx)->NewGLState |= new_state;
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   memcpy(dest, src, 16*sizeof(float));
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
 }
 
 
+static void update_texturematrix( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
+   GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
+   int unit;
 
+   rmesa->TexMatEnabled = 0;
 
-/* Initialize the context's hardware state.
- */
-void radeonInitState( radeonContextPtr rmesa )
-{
-   GLcontext *ctx = rmesa->glCtx;
-   GLuint color_fmt, depth_fmt;
+   for (unit = 0 ; unit < 2; unit++) {
+      if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
+      }
+      else if (ctx->TextureMatrix[unit].type != MATRIX_IDENTITY) {
+	 GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+	 
+	 rmesa->TexMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE|
+				  RADEON_TEXMAT_0_ENABLE) << unit;
+
+	 if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+	    /* Need to preconcatenate any active texgen 
+	     * obj/eyeplane matrices:
+	     */
+	    _math_matrix_mul_matrix( &rmesa->tmpmat, 
+				     &rmesa->TexGenMatrix[unit],
+				     &ctx->TextureMatrix[unit] );
+	    upload_matrix( rmesa, rmesa->tmpmat.m, TEXMAT_0+unit );
+	 } 
+	 else {
+	    rmesa->TexMatEnabled |= 
+	       (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+	    upload_matrix( rmesa, ctx->TextureMatrix[unit].m, 
+			   TEXMAT_0+unit );
+	 }
+      }
+      else if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+	 upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, 
+			TEXMAT_0+unit );
+      }
+   }
 
-   switch ( rmesa->radeonScreen->cpp ) {
-   case 2:
-      color_fmt = RADEON_COLOR_FORMAT_RGB565;
-      break;
-   case 4:
-      color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
-      break;
-   default:
-      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
-      exit( -1 );
+
+   tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
+
+   vs &= ~((0xf << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
+	   (0xf << RADEON_TCL_TEX_1_OUTPUT_SHIFT));
+
+   if (tpc & RADEON_TEXGEN_TEXMAT_0_ENABLE)
+      vs |= RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT;
+   else
+      vs |= RADEON_TCL_TEX_INPUT_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT;
+
+   if (tpc & RADEON_TEXGEN_TEXMAT_1_ENABLE)
+      vs |= RADEON_TCL_TEX_COMPUTED_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT;
+   else
+      vs |= RADEON_TCL_TEX_INPUT_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT;
+
+   if (tpc != rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] ||
+       vs != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]) {
+      
+      RADEON_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = tpc;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] = vs;
    }
+}
 
-   rmesa->state.color.clear = 0x00000000;
 
-   switch ( ctx->Visual.depthBits ) {
-   case 16:
-      rmesa->state.depth.clear = 0x0000ffff;
-      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
-      depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
-      rmesa->state.stencil.clear = 0x00000000;
-      break;
-   case 24:
-      rmesa->state.depth.clear = 0x00ffffff;
-      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
-      depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
-      rmesa->state.stencil.clear = 0xffff0000;
-      break;
-   default:
-      fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
-	       ctx->Visual.depthBits );
-      exit( -1 );
+
+void radeonValidateState( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint new_state = rmesa->NewGLState;
+
+   if (new_state & _NEW_TEXTURE) {
+      radeonUpdateTextureState( ctx );
+      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
    }
 
-   /* Only have hw stencil when depth buffer is 24 bits deep */
-   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
-				     ctx->Visual.depthBits == 24 );
+   /* Need an event driven matrix update?
+    */
+   if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) 
+      upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, MODEL_PROJ );
 
-   rmesa->RenderIndex = ~0;
-   rmesa->Fallback = 0;
-   rmesa->render_primitive = GL_TRIANGLES;
-   rmesa->hw_primitive = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST;
+   /* Need these for lighting (shouldn't upload otherwise)
+    */
+   if (new_state & (_NEW_MODELVIEW)) {
+      upload_matrix( rmesa, ctx->ModelView.m, MODEL );
+      upload_matrix_t( rmesa, ctx->ModelView.inv, MODEL_IT );
+   }
 
-   if ( ctx->Visual.doubleBufferMode ) {
-      rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
-      rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
-   } else {
-      rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
-      rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+   /* Does this need to be triggered on eg. modelview for
+    * texgen-derived objplane/eyeplane matrices?
+    */
+   if (new_state & _NEW_TEXTURE_MATRIX) {
+      update_texturematrix( ctx );
+   }      
+
+   if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
+      update_light( ctx );
    }
-   rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
-   rmesa->state.pixel.readPitch  = rmesa->state.color.drawPitch;
 
-   /* Harware state:
+   /* emit all active clip planes if projection matrix changes.
     */
-   rmesa->state.hw.context.pp_misc = (RADEON_ALPHA_TEST_PASS |
-				   RADEON_CHROMA_FUNC_FAIL |
-				   RADEON_CHROMA_KEY_NEAREST |
-				   RADEON_SHADOW_FUNC_EQUAL |
-				   RADEON_SHADOW_PASS_1 |
-				   RADEON_RIGHT_HAND_CUBE_OGL);
+   if (new_state & (_NEW_PROJECTION)) {
+      if (ctx->Transform._AnyClip) 
+	 radeonUpdateClipPlanes( ctx );
+   }
 
-   rmesa->state.hw.context.pp_fog_color = ((0x00000000 & RADEON_FOG_COLOR_MASK) |
-					RADEON_FOG_VERTEX |
-					RADEON_FOG_USE_DEPTH);
 
-   rmesa->state.hw.context.re_solid_color = 0x00000000;
+   rmesa->NewGLState = 0;
+}
 
-   rmesa->state.hw.context.rb3d_blendcntl = (RADEON_COMB_FCN_ADD_CLAMP |
-					  RADEON_SRC_BLEND_GL_ONE |
-					  RADEON_DST_BLEND_GL_ZERO );
 
-   rmesa->state.hw.context.rb3d_depthoffset = rmesa->radeonScreen->depthOffset;
+static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _ac_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _ae_invalidate_state( ctx, new_state );
+   RADEON_CONTEXT(ctx)->NewGLState |= new_state;
+   radeonVtxfmtInvalidate( ctx );
+}
 
-   rmesa->state.hw.context.rb3d_depthpitch = ((rmesa->radeonScreen->depthPitch &
-					    RADEON_DEPTHPITCH_MASK) |
-					   RADEON_DEPTH_ENDIAN_NO_SWAP);
+static void radeonWrapRunPipeline( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
 
-   rmesa->state.hw.context.rb3d_zstencilcntl = (depth_fmt |
-					     RADEON_Z_TEST_LESS |
-					     RADEON_STENCIL_TEST_ALWAYS |
-					     RADEON_STENCIL_FAIL_KEEP |
-					     RADEON_STENCIL_ZPASS_KEEP |
-					     RADEON_STENCIL_ZFAIL_KEEP |
-					     RADEON_Z_WRITE_ENABLE);
+   if (0)
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
 
-   rmesa->state.hw.context.pp_cntl = (RADEON_SCISSOR_ENABLE |
-				   RADEON_ANTI_ALIAS_NONE);
+   /* Validate state:
+    */
+   if (rmesa->NewGLState)
+      radeonValidateState( ctx );
 
-   rmesa->state.hw.context.rb3d_cntl = (RADEON_PLANE_MASK_ENABLE |
-				     color_fmt |
-				     RADEON_ZBLOCK16);
+   if (tnl->vb.Material) {
+      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_TRUE );
+   }
 
-   rmesa->state.hw.context.rb3d_coloroffset = (rmesa->state.color.drawOffset &
-					    RADEON_COLOROFFSET_MASK);
+   /* Run the pipeline.
+    */ 
+   _tnl_run_pipeline( ctx );
 
-   rmesa->state.hw.context.re_width_height = ((0x7ff << RADEON_RE_WIDTH_SHIFT) |
-					   (0x7ff << RADEON_RE_HEIGHT_SHIFT));
-
-   rmesa->state.hw.context.rb3d_colorpitch = ((rmesa->state.color.drawPitch &
-					    RADEON_COLORPITCH_MASK) |
-					   RADEON_COLOR_ENDIAN_NO_SWAP);
-
-   rmesa->state.hw.setup1.se_cntl = (RADEON_FFACE_CULL_CCW |
-				 RADEON_BFACE_SOLID |
-				 RADEON_FFACE_SOLID |
-				 RADEON_FLAT_SHADE_VTX_LAST |
-				 RADEON_DIFFUSE_SHADE_GOURAUD |
-				 RADEON_ALPHA_SHADE_GOURAUD |
-				 RADEON_SPECULAR_SHADE_GOURAUD |
-				 RADEON_FOG_SHADE_GOURAUD |
-				 RADEON_VPORT_XY_XFORM_ENABLE |
-				 RADEON_VPORT_Z_XFORM_ENABLE |
-				 RADEON_VTX_PIX_CENTER_OGL |
-				 RADEON_ROUND_MODE_TRUNC |
-				 RADEON_ROUND_PREC_8TH_PIX);
-
-   rmesa->state.hw.vertex.se_coord_fmt = (
-#if 1
-      RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
-      RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
-#else
-      RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
-#endif
-      RADEON_TEX1_W_ROUTING_USE_Q1);
-
-   rmesa->state.hw.setup2.se_cntl_status = (RADEON_TCL_BYPASS |
-#if BYTE_ORDER == BIG_ENDIAN
-					    RADEON_VC_32BIT_SWAP);
-#else
-					    RADEON_VC_NO_SWAP);
-#endif
-
-   rmesa->state.hw.line.re_line_pattern = ((0x0000 & RADEON_LINE_PATTERN_MASK) |
-					(0 << RADEON_LINE_REPEAT_COUNT_SHIFT) |
-					(0 << RADEON_LINE_PATTERN_START_SHIFT) |
-					RADEON_LINE_PATTERN_LITTLE_BIT_ORDER);
-
-   rmesa->state.hw.line.re_line_state = ((0 << RADEON_LINE_CURRENT_PTR_SHIFT) |
-				      (1 << RADEON_LINE_CURRENT_COUNT_SHIFT));
-
-   rmesa->state.hw.line.se_line_width = (1 << 4);
-
-   rmesa->state.hw.bumpmap.pp_lum_matrix = 0x00000000;
-   rmesa->state.hw.bumpmap.pp_rot_matrix_0 = 0x00000000;
-   rmesa->state.hw.bumpmap.pp_rot_matrix_1 = 0x00000000;
-
-   rmesa->state.hw.mask.rb3d_stencilrefmask = ((0x00 << RADEON_STENCIL_REF_SHIFT) |
-					       (0xff << RADEON_STENCIL_MASK_SHIFT) |
-					       (0xff << RADEON_STENCIL_WRITEMASK_SHIFT));
-
-   rmesa->state.hw.mask.rb3d_ropcntl = RADEON_ROP_COPY;
-   rmesa->state.hw.mask.rb3d_planemask = 0xffffffff;
-
-   rmesa->state.hw.viewport.se_vport_xscale  = 0x00000000;
-   rmesa->state.hw.viewport.se_vport_xoffset = 0x00000000;
-   rmesa->state.hw.viewport.se_vport_yscale  = 0x00000000;
-   rmesa->state.hw.viewport.se_vport_yoffset = 0x00000000;
-   rmesa->state.hw.viewport.se_vport_zscale  = 0x00000000;
-   rmesa->state.hw.viewport.se_vport_zoffset = 0x00000000;
-
-   rmesa->state.hw.misc.re_misc = ((0 << RADEON_STIPPLE_X_OFFSET_SHIFT) |
-				   (0 << RADEON_STIPPLE_Y_OFFSET_SHIFT) |
-				   RADEON_STIPPLE_BIG_BIT_ORDER);
-
-   rmesa->state.hw.dirty = RADEON_UPLOAD_CONTEXT_ALL;
+   if (tnl->vb.Material) {
+      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_FALSE );
+      radeonUpdateMaterial( ctx ); /* not needed any more? */
+   }
 }
 
+
+
+
 /* Initialize the driver's state functions.
  */
 void radeonInitStateFuncs( GLcontext *ctx )
 {
    ctx->Driver.UpdateState		= radeonInvalidateState;
+   ctx->Driver.LightingSpaceChange      = radeonLightingSpaceChange;
 
    ctx->Driver.SetDrawBuffer		= radeonSetDrawBuffer;
 
@@ -1313,6 +2092,7 @@ void radeonInitStateFuncs( GLcontext *ctx )
    ctx->Driver.ClearDepth		= radeonClearDepth;
    ctx->Driver.ClearIndex		= NULL;
    ctx->Driver.ClearStencil		= radeonClearStencil;
+   ctx->Driver.ClipPlane		= radeonClipPlane;
    ctx->Driver.ColorMask		= radeonColorMask;
    ctx->Driver.CullFace			= radeonCullFace;
    ctx->Driver.DepthFunc		= radeonDepthFunc;
@@ -1324,12 +2104,15 @@ void radeonInitStateFuncs( GLcontext *ctx )
    ctx->Driver.Hint			= NULL;
    ctx->Driver.IndexMask		= NULL;
    ctx->Driver.LightModelfv		= radeonLightModelfv;
-   ctx->Driver.Lightfv			= NULL;
+   ctx->Driver.Lightfv			= radeonLightfv;
    ctx->Driver.LineStipple              = radeonLineStipple;
    ctx->Driver.LineWidth                = radeonLineWidth;
    ctx->Driver.LogicOpcode		= radeonLogicOpCode;
-   ctx->Driver.PolygonMode		= NULL;
-   ctx->Driver.PolygonOffset		= radeonPolygonOffset;
+   ctx->Driver.PolygonMode		= radeonPolygonMode;
+
+   if (RADEON_CONTEXT(ctx)->dri.drmMinor > 1)
+      ctx->Driver.PolygonOffset		= radeonPolygonOffset;
+
    ctx->Driver.PolygonStipple		= radeonPolygonStipple;
    ctx->Driver.RenderMode		= radeonRenderMode;
    ctx->Driver.Scissor			= radeonScissor;
@@ -1353,4 +2136,7 @@ void radeonInitStateFuncs( GLcontext *ctx )
    ctx->Driver.CopyColorSubTable	= _swrast_CopyColorSubTable;
    ctx->Driver.CopyConvolutionFilter1D	= _swrast_CopyConvolutionFilter1D;
    ctx->Driver.CopyConvolutionFilter2D	= _swrast_CopyConvolutionFilter2D;
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = radeonWrapRunPipeline;
 }
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_state.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_state.h
index 47406fcfc..e428e4740 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_state.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_state.h
@@ -44,11 +44,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 extern void radeonInitState( radeonContextPtr rmesa );
 extern void radeonInitStateFuncs( GLcontext *ctx );
 
-extern void radeonUpdateWindow( GLcontext *ctx );
+extern void radeonUpdateMaterial( GLcontext *ctx );
+
 extern void radeonSetCliprects( radeonContextPtr rmesa, GLenum mode );
+extern void radeonRecalcScissorRects( radeonContextPtr rmesa );
 extern void radeonUpdateViewportOffset( GLcontext *ctx );
+extern void radeonUpdateWindow( GLcontext *ctx );
 
-extern void radeonPrintDirty( const char *msg, GLuint state );
+extern void radeonValidateState( GLcontext *ctx );
+
+extern void radeonPrintDirty( radeonContextPtr rmesa,
+			      const char *msg );
 
 
 extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
@@ -59,5 +65,12 @@ extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
 } while (0)
 
 
+#define MODEL_PROJ 0
+#define MODEL      1
+#define MODEL_IT   2
+#define TEXMAT_0   3
+#define TEXMAT_1   4
+#define TEXMAT_2   5
+
 #endif
 #endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c
new file mode 100644
index 000000000..d7693cbde
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c
@@ -0,0 +1,550 @@
+/* $XFree86$ */
+/*
+ * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_tcl.h"
+#include "radeon_tex.h"
+#include "radeon_swtcl.h"
+#include "radeon_vtxfmt.h"
+
+#include "mem.h"
+#include "mmath.h"
+#include "enums.h"
+#include "colormac.h"
+#include "light.h"
+#include "api_arrayelt.h"
+
+#include "swrast/swrast.h"
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+
+/* =============================================================
+ * State initialization
+ */
+
+void radeonPrintDirty( radeonContextPtr rmesa, const char *msg )
+{
+   struct radeon_state_atom *l;
+
+   fprintf(stderr, msg);
+   fprintf(stderr, ": ");
+
+   foreach(l, &(rmesa->hw.dirty)) {
+      fprintf(stderr, "%s, ", l->name);
+   }
+
+   fprintf(stderr, "\n");
+}
+
+static int cmdpkt( int id ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.packet.cmd_type = RADEON_CMD_PACKET;
+   h.packet.packet_id = id;
+   return h.i;
+}
+
+static int cmdvec( int offset, int stride, int count ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.vectors.cmd_type = RADEON_CMD_VECTORS;
+   h.vectors.offset = offset;
+   h.vectors.stride = stride;
+   h.vectors.count = count;
+   return h.i;
+}
+
+static int cmdscl( int offset, int stride, int count ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.scalars.cmd_type = RADEON_CMD_SCALARS;
+   h.scalars.offset = offset;
+   h.scalars.stride = stride;
+   h.scalars.count = count;
+   return h.i;
+}
+
+#define CHECK( NM, FLAG )			\
+static GLboolean check_##NM( GLcontext *ctx )	\
+{						\
+   return FLAG;					\
+}
+
+#define TCL_CHECK( NM, FLAG )				\
+static GLboolean check_##NM( GLcontext *ctx )		\
+{							\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);	\
+   return !rmesa->TclFallback && (FLAG);		\
+}
+
+
+CHECK( always, GL_TRUE )
+CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled )
+CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+CHECK( fog, ctx->Fog.Enabled )
+TCL_CHECK( tcl, GL_TRUE )
+TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled )
+TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
+TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled )
+TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled )
+TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled )
+TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled )
+TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled )
+TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled )
+TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled )
+TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled )
+TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled )
+TCL_CHECK( tcl_ucp0, ctx->Transform.ClipEnabled[0] )
+TCL_CHECK( tcl_ucp1, ctx->Transform.ClipEnabled[1] )
+TCL_CHECK( tcl_ucp2, ctx->Transform.ClipEnabled[2] )
+TCL_CHECK( tcl_ucp3, ctx->Transform.ClipEnabled[3] )
+TCL_CHECK( tcl_ucp4, ctx->Transform.ClipEnabled[4] )
+TCL_CHECK( tcl_ucp5, ctx->Transform.ClipEnabled[5] )
+TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled ) 
+
+
+
+/* Initialize the context's hardware state.
+ */
+void radeonInitState( radeonContextPtr rmesa )
+{
+   GLcontext *ctx = rmesa->glCtx;
+   GLuint color_fmt, depth_fmt, i;
+
+   switch ( rmesa->radeonScreen->cpp ) {
+   case 2:
+      color_fmt = RADEON_COLOR_FORMAT_RGB565;
+      break;
+   case 4:
+      color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
+      exit( -1 );
+   }
+
+   rmesa->state.color.clear = 0x00000000;
+
+   switch ( ctx->Visual.depthBits ) {
+   case 16:
+      rmesa->state.depth.clear = 0x0000ffff;
+      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
+      depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+      rmesa->state.stencil.clear = 0x00000000;
+      break;
+   case 24:
+      rmesa->state.depth.clear = 0x00ffffff;
+      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
+      depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+      rmesa->state.stencil.clear = 0xff000000;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
+	       ctx->Visual.depthBits );
+      exit( -1 );
+   }
+
+   /* Only have hw stencil when depth buffer is 24 bits deep */
+   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
+				     ctx->Visual.depthBits == 24 );
+
+   rmesa->Fallback = 0;
+
+   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
+      rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+      rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+   } else {
+      rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+      rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+   }
+   rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
+   rmesa->state.pixel.readPitch  = rmesa->state.color.drawPitch;
+
+   /* Initialize lists:
+    */
+   make_empty_list(&(rmesa->hw.dirty));
+   make_empty_list(&(rmesa->hw.clean));
+
+
+#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )				\
+   do {								\
+      rmesa->hw.ATOM.cmd_size = SZ;				\
+      rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));	\
+      rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));	\
+      rmesa->hw.ATOM.name = NM;					\
+      rmesa->hw.ATOM.is_tcl = FLAG;					\
+      rmesa->hw.ATOM.check = check_##CHK;				\
+      insert_at_head(&(rmesa->hw.dirty), &(rmesa->hw.ATOM));	\
+   } while (0)
+      
+      
+   /* Allocate state buffers:
+    */
+   ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 );
+   ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+   ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+   ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
+   ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
+   ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
+   ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
+   ALLOC_STATE( tcl, always, TCL_STATE_SIZE, "TCL/tcl", 1 );
+   ALLOC_STATE( mtl, tcl_lighting, MTL_STATE_SIZE, "MTL/material", 1 );
+   ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+   ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
+   ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
+   ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+   ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 );
+   ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 );
+   ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+   ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
+   ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+   ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+   ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+   ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+   ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+   ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+   ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+   ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+   ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+   ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 );
+   ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 );
+   ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 );
+   ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 );
+   ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 );
+   ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
+   ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+   ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
+
+
+   /* Fill in the packet headers:
+    */
+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
+   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS);
+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
+   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0);
+   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0);
+   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1);
+   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1);
+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
+   rmesa->hw.mtl.cmd[MTL_CMD_0] = 
+      cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
+   rmesa->hw.grd.cmd[GRD_CMD_0] = 
+      cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
+   rmesa->hw.fog.cmd[FOG_CMD_0] = 
+      cmdvec( RADEON_VS_FOG_PARAM_ADDR, 1, 4 );
+   rmesa->hw.glt.cmd[GLT_CMD_0] = 
+      cmdvec( RADEON_VS_GLOBAL_AMBIENT_ADDR, 1, 4 );
+   rmesa->hw.eye.cmd[EYE_CMD_0] = 
+      cmdvec( RADEON_VS_EYE_VECTOR_ADDR, 1, 4 );
+
+   for (i = 0 ; i < 5; i++) {
+      rmesa->hw.mat[i].cmd[MAT_CMD_0] = 
+	 cmdvec( RADEON_VS_MATRIX_0_ADDR + i*4, 1, 16);
+   }
+
+   for (i = 0 ; i < 8; i++) {
+      rmesa->hw.lit[i].cmd[LIT_CMD_0] = 
+	 cmdvec( RADEON_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 );
+      rmesa->hw.lit[i].cmd[LIT_CMD_1] = 
+	 cmdscl( RADEON_SS_LIGHT_DCD_ADDR + i, 8, 6 );
+   }
+
+   for (i = 0 ; i < 6; i++) {
+      rmesa->hw.ucp[i].cmd[UCP_CMD_0] = 
+	 cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 );
+   }
+
+   rmesa->last_ReallyEnabled = -1;
+
+   /* Initial Harware state:
+    */
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = (RADEON_ALPHA_TEST_PASS |
+				     RADEON_CHROMA_FUNC_FAIL |
+				     RADEON_CHROMA_KEY_NEAREST |
+				     RADEON_SHADOW_FUNC_EQUAL |
+				     RADEON_SHADOW_PASS_1 |
+				     RADEON_RIGHT_HAND_CUBE_OGL);
+
+   rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (RADEON_FOG_VERTEX |
+					  RADEON_FOG_USE_DEPTH);
+
+   rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (RADEON_COMB_FCN_ADD_CLAMP |
+					    RADEON_SRC_BLEND_GL_ONE |
+					    RADEON_DST_BLEND_GL_ZERO );
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
+      rmesa->radeonScreen->depthOffset;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
+      ((rmesa->radeonScreen->depthPitch &
+	RADEON_DEPTHPITCH_MASK) |
+       RADEON_DEPTH_ENDIAN_NO_SWAP);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
+					       RADEON_Z_TEST_LESS |
+					       RADEON_STENCIL_TEST_ALWAYS |
+					       RADEON_STENCIL_FAIL_KEEP |
+					       RADEON_STENCIL_ZPASS_KEEP |
+					       RADEON_STENCIL_ZFAIL_KEEP |
+					       RADEON_Z_WRITE_ENABLE);
+
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (RADEON_SCISSOR_ENABLE |
+				     RADEON_ANTI_ALIAS_NONE);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE |
+				       color_fmt |
+				       (1<<15));
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = (rmesa->state.color.drawOffset &
+					      RADEON_COLOROFFSET_MASK);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch &
+					      RADEON_COLORPITCH_MASK) |
+					     RADEON_COLOR_ENDIAN_NO_SWAP);
+
+   rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW |
+				     RADEON_BFACE_SOLID |
+				     RADEON_FFACE_SOLID |
+/*  			     RADEON_BADVTX_CULL_DISABLE | */
+				     RADEON_FLAT_SHADE_VTX_LAST |
+				     RADEON_DIFFUSE_SHADE_GOURAUD |
+				     RADEON_ALPHA_SHADE_GOURAUD |
+				     RADEON_SPECULAR_SHADE_GOURAUD |
+				     RADEON_FOG_SHADE_GOURAUD |
+				     RADEON_VPORT_XY_XFORM_ENABLE |
+				     RADEON_VPORT_Z_XFORM_ENABLE |
+				     RADEON_VTX_PIX_CENTER_OGL |
+				     RADEON_ROUND_MODE_TRUNC |
+				     RADEON_ROUND_PREC_8TH_PIX);
+
+   rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] = (RADEON_VC_NO_SWAP 
+					    /*  | RADEON_TCL_BYPASS */);
+
+   rmesa->hw.set.cmd[SET_SE_COORDFMT] = (
+      RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+      RADEON_TEX1_W_ROUTING_USE_Q1);
+
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] =
+      ((0x0000 & RADEON_LINE_PATTERN_MASK) |
+       (0 << RADEON_LINE_REPEAT_COUNT_SHIFT) |
+       (0 << RADEON_LINE_PATTERN_START_SHIFT) |
+       RADEON_LINE_PATTERN_LITTLE_BIT_ORDER);
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] = 
+      ((0 << RADEON_LINE_CURRENT_PTR_SHIFT) |
+       (1 << RADEON_LINE_CURRENT_COUNT_SHIFT));
+
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4);
+
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] = 
+      ((0x00 << RADEON_STENCIL_REF_SHIFT) |
+       (0xff << RADEON_STENCIL_MASK_SHIFT) |
+       (0xff << RADEON_STENCIL_WRITEMASK_SHIFT));
+
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = RADEON_ROP_COPY;
+   rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff;
+
+   rmesa->hw.msc.cmd[MSC_RE_MISC] = 
+      ((0 << RADEON_STIPPLE_X_OFFSET_SHIFT) |
+       (0 << RADEON_STIPPLE_Y_OFFSET_SHIFT) |
+       RADEON_STIPPLE_BIG_BIT_ORDER);
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000;
+
+   rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] = RADEON_BORDER_MODE_OGL;
+   rmesa->hw.tex[0].cmd[TEX_PP_TXFORMAT] = 
+      (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+       RADEON_TXFORMAT_PERSPECTIVE_ENABLE |
+       RADEON_TXFORMAT_ST_ROUTE_STQ0 |
+       (2 << RADEON_TXFORMAT_WIDTH_SHIFT) |
+       (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
+   rmesa->hw.tex[0].cmd[TEX_PP_TXOFFSET] = 0x2000;
+   rmesa->hw.tex[0].cmd[TEX_PP_BORDER_COLOR] = 0;
+   rmesa->hw.tex[0].cmd[TEX_PP_TXCBLEND] =  
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_CURRENT_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX);
+   rmesa->hw.tex[0].cmd[TEX_PP_TXABLEND] = 
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX);
+   rmesa->hw.tex[0].cmd[TEX_PP_TFACTOR] = 0;
+
+   rmesa->hw.tex[1].cmd[TEX_PP_TXFILTER] = RADEON_BORDER_MODE_OGL;
+   rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] = 
+      (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+       RADEON_TXFORMAT_PERSPECTIVE_ENABLE |
+       RADEON_TXFORMAT_ST_ROUTE_STQ1 |
+       (2 << RADEON_TXFORMAT_WIDTH_SHIFT) |
+       (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
+   rmesa->hw.tex[1].cmd[TEX_PP_TXOFFSET] = 0x8000;
+   rmesa->hw.tex[1].cmd[TEX_PP_BORDER_COLOR] = 0;
+   rmesa->hw.tex[1].cmd[TEX_PP_TXCBLEND] =     
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_CURRENT_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX);
+   rmesa->hw.tex[1].cmd[TEX_PP_TXABLEND] = 
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX);
+   rmesa->hw.tex[1].cmd[TEX_PP_TFACTOR] = 0;
+
+   /* Can oly add ST1 at the time of doing some multitex but can keep
+    * it after that.  Errors if DIFFUSE is missing.
+    */
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = 
+      (RADEON_TCL_VTX_Z0 |
+       RADEON_TCL_VTX_W0 |
+       RADEON_TCL_VTX_PK_DIFFUSE
+	 );	/* need to keep this uptodate */
+						   
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] =
+      ( RADEON_TCL_COMPUTE_XYZW 	|
+	(RADEON_TCL_TEX_INPUT_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
+	(RADEON_TCL_TEX_INPUT_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT) |
+	(RADEON_TCL_TEX_INPUT_TEX_2 << RADEON_TCL_TEX_2_OUTPUT_SHIFT));
+
+
+   /* XXX */
+   rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_0] = 
+      ((MODEL << RADEON_MODELVIEW_0_SHIFT) |
+       (MODEL_IT << RADEON_IT_MODELVIEW_0_SHIFT));
+
+   rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_1] = 
+      ((MODEL_PROJ << RADEON_MODELPROJECT_0_SHIFT) |
+       (TEXMAT_0 << RADEON_TEXMAT_0_SHIFT) |
+       (TEXMAT_1 << RADEON_TEXMAT_1_SHIFT));
+
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = 
+      (RADEON_UCP_IN_CLIP_SPACE |
+       RADEON_CULL_FRONT_IS_CCW);
+
+   rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = 0; 
+
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = 
+      (RADEON_SPECULAR_LIGHTS |
+       RADEON_DIFFUSE_SPECULAR_COMBINE |
+       RADEON_LOCAL_LIGHT_VEC_GL |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_EMISSIVE_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_AMBIENT_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_DIFFUSE_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_SPECULAR_SOURCE_SHIFT)); 
+
+   for (i = 0 ; i < 8; i++) {
+      struct gl_light *l = &ctx->Light.Light[i];
+      GLenum p = GL_LIGHT0 + i;
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX;
+
+      ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient );
+      ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse );
+      ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular );
+      ctx->Driver.Lightfv( ctx, p, GL_POSITION, 0 );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, 0 );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff );
+      ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION,
+			   &l->ConstantAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION, 
+			   &l->LinearAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION, 
+		     &l->QuadraticAttenuation );
+   }
+
+   ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT, 
+			     ctx->Light.Model.Ambient );
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx );
+
+   for (i = 0 ; i < 8; i++) {
+      ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL );
+   }
+
+   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 );
+   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, 0 );
+   
+   
+   /* Set up vector and scalar state commands:
+    */
+/*     upload_matrix( rmesa, ctx->ModelView.m, MODEL ); */
+/*     upload_matrix_t( rmesa, ctx->ModelView.inv, MODEL_IT ); */
+/*     upload_matrix( rmesa, ctx->TextureMatrix[0].m, TEXMAT_0 ); */
+/*     upload_matrix( rmesa, ctx->TextureMatrix[1].m, TEXMAT_1 ); */
+/*     upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, TEXMAT_2 ); */
+
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE;
+
+   rmesa->hw.eye.cmd[EYE_X] = 0;
+   rmesa->hw.eye.cmd[EYE_Y] = 0;
+   rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
+   rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c
new file mode 100644
index 000000000..4ebaf0bbd
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c
@@ -0,0 +1,1162 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "enums.h"
+#include "mem.h"
+#include "mmath.h"
+#include "macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_swtcl.h"
+#include "radeon_tcl.h"
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+
+#define RADEON_XYZW_BIT		0x01
+#define RADEON_RGBA_BIT		0x02
+#define RADEON_SPEC_BIT		0x04
+#define RADEON_TEX0_BIT		0x08
+#define RADEON_TEX1_BIT		0x10
+#define RADEON_PTEX_BIT		0x20
+#define RADEON_MAX_SETUP	0x40
+
+static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
+static void flush_last_swtcl_prim_compat( radeonContextPtr rmesa );
+
+static struct {
+   void                (*emit)( GLcontext *, GLuint, GLuint, void *, GLuint );
+   interp_func		interp;
+   copy_pv_func	        copy_pv;
+   GLboolean           (*check_tex_sizes)( GLcontext *ctx );
+   GLuint               vertex_size;
+   GLuint               vertex_stride_shift;
+   GLuint               vertex_format;
+} setup_tab[RADEON_MAX_SETUP];
+
+
+#define TINY_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR)
+
+#define NOTEX_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC)
+
+#define TEX0_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC |	\
+					 RADEON_CP_VC_FRMT_ST0)
+
+#define TEX1_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC |	\
+					 RADEON_CP_VC_FRMT_ST0 |	\
+					 RADEON_CP_VC_FRMT_ST1)
+
+#define PROJ_TEX1_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC |	\
+					 RADEON_CP_VC_FRMT_ST0 |	\
+					 RADEON_CP_VC_FRMT_Q0 |         \
+					 RADEON_CP_VC_FRMT_ST1 |	\
+					 RADEON_CP_VC_FRMT_Q1)
+
+#define TEX2_VERTEX_FORMAT 0
+#define TEX3_VERTEX_FORMAT 0
+#define PROJ_TEX3_VERTEX_FORMAT 0
+
+#define DO_XYZW (IND & RADEON_XYZW_BIT)
+#define DO_RGBA (IND & RADEON_RGBA_BIT)
+#define DO_SPEC (IND & RADEON_SPEC_BIT)
+#define DO_FOG  (IND & RADEON_SPEC_BIT)
+#define DO_TEX0 (IND & RADEON_TEX0_BIT)
+#define DO_TEX1 (IND & RADEON_TEX1_BIT)
+#define DO_TEX2 0
+#define DO_TEX3 0
+#define DO_PTEX (IND & RADEON_PTEX_BIT)
+
+#define VERTEX radeonVertex
+#define GET_VIEWPORT_MAT() 0
+#define GET_TEXSOURCE(n)  n
+#define GET_VERTEX_FORMAT() RADEON_CONTEXT(ctx)->swtcl.vertex_format
+#define GET_VERTEX_STORE() RADEON_CONTEXT(ctx)->swtcl.verts
+#define GET_VERTEX_STRIDE_SHIFT() RADEON_CONTEXT(ctx)->swtcl.vertex_stride_shift
+#define GET_UBYTE_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteColor
+#define GET_UBYTE_SPEC_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteSecondaryColor
+
+#define HAVE_HW_VIEWPORT    1
+/* Tiny vertices don't seem to work atm - haven't looked into why.
+ */
+#define HAVE_HW_DIVIDE      (IND & ~(RADEON_XYZW_BIT|RADEON_RGBA_BIT))
+#define HAVE_TINY_VERTICES  1
+#define HAVE_RGBA_COLOR     1
+#define HAVE_NOTEX_VERTICES 1
+#define HAVE_TEX0_VERTICES  1
+#define HAVE_TEX1_VERTICES  1
+#define HAVE_TEX2_VERTICES  0
+#define HAVE_TEX3_VERTICES  0
+#define HAVE_PTEX_VERTICES  1
+
+#define CHECK_HW_DIVIDE    (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE| \
+                                                    DD_TRI_UNFILLED)))
+
+#define IMPORT_QUALIFIER
+#define IMPORT_FLOAT_COLORS radeon_import_float_colors
+#define IMPORT_FLOAT_SPEC_COLORS radeon_import_float_spec_colors
+
+#define INTERP_VERTEX setup_tab[RADEON_CONTEXT(ctx)->swtcl.SetupIndex].interp
+#define COPY_PV_VERTEX setup_tab[RADEON_CONTEXT(ctx)->swtcl.SetupIndex].copy_pv
+
+
+/***********************************************************************
+ *         Generate  pv-copying and translation functions              *
+ ***********************************************************************/
+
+#define TAG(x) radeon_##x
+#define IND ~0
+#include "tnl_dd/t_dd_vb.c"
+#undef IND
+
+
+/***********************************************************************
+ *             Generate vertex emit and interp functions               *
+ ***********************************************************************/
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_TEX1_BIT|\
+             RADEON_PTEX_BIT)
+#define TAG(x) x##_wgpt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT|RADEON_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT|RADEON_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT|RADEON_TEX1_BIT|RADEON_PTEX_BIT)
+#define TAG(x) x##_wgfspt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+
+/***********************************************************************
+ *                         Initialization 
+ ***********************************************************************/
+
+static void init_setup_tab( void )
+{
+   init_wg();
+   init_wgt0();
+   init_wgpt0();
+   init_wgt0t1();
+   init_wgpt0t1();
+   init_wgfs();
+   init_wgfst0();
+   init_wgfspt0();
+   init_wgfst0t1();
+   init_wgfspt0t1();
+}
+
+
+
+void radeonPrintSetupFlags(char *msg, GLuint flags )
+{
+   fprintf(stderr, "%s(%x): %s%s%s%s%s%s\n",
+	   msg,
+	   (int)flags,
+	   (flags & RADEON_XYZW_BIT)      ? " xyzw," : "",
+	   (flags & RADEON_RGBA_BIT)     ? " rgba," : "",
+	   (flags & RADEON_SPEC_BIT)     ? " spec/fog," : "",
+	   (flags & RADEON_TEX0_BIT)     ? " tex-0," : "",
+	   (flags & RADEON_TEX1_BIT)     ? " tex-1," : "",
+	   (flags & RADEON_PTEX_BIT)     ? " proj-tex," : "");
+}
+
+
+static void radeonRenderStart( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (!setup_tab[rmesa->swtcl.SetupIndex].check_tex_sizes(ctx)) {
+      GLuint ind = rmesa->swtcl.SetupIndex |= (RADEON_PTEX_BIT|RADEON_RGBA_BIT);
+
+      /* Radeon handles projective textures nicely; just have to change
+       * up to the new vertex format.
+       */
+      if (setup_tab[ind].vertex_format != rmesa->swtcl.vertex_format) {
+	 RADEON_NEWPRIM(rmesa);
+	 rmesa->swtcl.vertex_format = setup_tab[ind].vertex_format;
+	 rmesa->swtcl.vertex_size = setup_tab[ind].vertex_size;
+	 rmesa->swtcl.vertex_stride_shift = setup_tab[ind].vertex_stride_shift;
+      }
+
+      if (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	 tnl->Driver.Render.Interp = setup_tab[rmesa->swtcl.SetupIndex].interp;
+	 tnl->Driver.Render.CopyPV = setup_tab[rmesa->swtcl.SetupIndex].copy_pv;
+      }
+   }
+   
+   if (rmesa->dma.flush != 0 && 
+       rmesa->dma.flush != flush_last_swtcl_prim &&
+       rmesa->dma.flush != flush_last_swtcl_prim_compat)
+      rmesa->dma.flush( rmesa );
+}
+
+
+void radeonBuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+			   GLuint newinputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   GLubyte *v = ((GLubyte *)rmesa->swtcl.verts + 
+		 (start << rmesa->swtcl.vertex_stride_shift));
+   GLuint stride = 1 << rmesa->swtcl.vertex_stride_shift;
+
+   newinputs |= rmesa->swtcl.SetupNewInputs;
+   rmesa->swtcl.SetupNewInputs = 0;
+
+   if (!newinputs)
+      return;
+
+   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, v, stride );
+}
+
+void radeonChooseVertexState( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint ind = (RADEON_XYZW_BIT | RADEON_RGBA_BIT);
+
+   if (!rmesa->TclFallback || rmesa->Fallback)
+      return;
+
+   if (ctx->Fog.Enabled || (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR))
+      ind |= RADEON_SPEC_BIT;
+
+   if (ctx->Texture._ReallyEnabled & 0x0f0)
+      ind |= RADEON_TEX0_BIT|RADEON_TEX1_BIT;
+   else if (ctx->Texture._ReallyEnabled & 0x00f)
+      ind |= RADEON_TEX0_BIT;
+
+   rmesa->swtcl.SetupIndex = ind;
+
+   if (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED)) {
+      tnl->Driver.Render.Interp = radeon_interp_extras;
+      tnl->Driver.Render.CopyPV = radeon_copy_pv_extras;
+   }
+   else {
+      tnl->Driver.Render.Interp = setup_tab[ind].interp;
+      tnl->Driver.Render.CopyPV = setup_tab[ind].copy_pv;
+   }
+
+   if (setup_tab[ind].vertex_format != rmesa->swtcl.vertex_format) {
+      RADEON_NEWPRIM(rmesa);
+      rmesa->swtcl.vertex_format = setup_tab[ind].vertex_format;
+      rmesa->swtcl.vertex_size = setup_tab[ind].vertex_size;
+      rmesa->swtcl.vertex_stride_shift = setup_tab[ind].vertex_stride_shift;
+   }
+
+   {
+      GLuint se_coord_fmt, needproj;
+
+      /* HW perspective divide is a win, but tiny vertex formats are a
+       * bigger one.
+       */
+      if (setup_tab[ind].vertex_format == TINY_VERTEX_FORMAT ||
+	  (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	 needproj = GL_TRUE;
+	 se_coord_fmt = (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+			 RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
+			 RADEON_TEX1_W_ROUTING_USE_Q1);
+      }
+      else {
+	 needproj = GL_FALSE;
+	 se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+			 RADEON_TEX1_W_ROUTING_USE_Q1);
+      }
+
+      if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
+	 RADEON_STATECHANGE( rmesa, set );
+	 rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+      }
+      _tnl_need_projected_coords( ctx, needproj );
+   }
+}
+
+
+/* Flush vertices in the current dma region.
+ */
+static void flush_last_swtcl_prim( radeonContextPtr rmesa  )
+{
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->dma.current.buf) {
+      struct radeon_dma_region *current = &rmesa->dma.current;
+      GLuint current_offset = (rmesa->dri.agp_buffer_offset +
+			       current->buf->buf->idx * RADEON_BUFFER_SIZE + 
+			       current->start);
+
+      assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+
+      assert (current->start + 
+	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+	      current->ptr);
+
+      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+	 radeonEmitVertexAOS( rmesa,
+			      rmesa->swtcl.vertex_size,
+			      current_offset);
+
+	 radeonEmitVbufPrim( rmesa,
+			     rmesa->swtcl.vertex_format,
+			     rmesa->swtcl.hw_primitive,
+			     rmesa->swtcl.numverts);
+      }
+
+      rmesa->swtcl.numverts = 0;
+      current->start = current->ptr;
+
+      rmesa->dma.flush = 0;
+   }
+}
+
+
+static void flush_last_swtcl_prim_compat( radeonContextPtr rmesa )
+{
+   struct radeon_dma_region *current = &rmesa->dma.current;
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s buf %p start %d ptr %d\n", 
+	      __FUNCTION__,
+	      current->buf,
+	      current->start,
+	      current->ptr);
+
+   assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   assert (current->start + 
+	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+	   current->ptr);
+   assert (current->start == 0);
+
+   if (current->ptr && current->buf) {
+      assert (current->buf->refcount == 1);
+
+      radeonCompatEmitPrimitive( rmesa,
+				 rmesa->swtcl.vertex_format,
+				 rmesa->swtcl.hw_primitive,
+				 rmesa->swtcl.numverts);
+      
+      /* The buffer has been released:
+       */
+      FREE(current->buf);
+      current->buf = 0;
+      current->start = 0;
+      current->ptr = current->end;
+
+   }
+
+   rmesa->swtcl.numverts = 0;
+   rmesa->dma.flush = 0;
+}
+
+
+/* Alloc space in the current dma region.
+ */
+static __inline void *radeonAllocDmaLowVerts( radeonContextPtr rmesa,
+					      int nverts, int vsize )
+{
+   GLuint bytes = vsize * nverts;
+
+   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+      radeonRefillCurrentDmaRegion( rmesa );
+
+   if (!rmesa->dma.flush) {
+      if (rmesa->dri.drmMinor == 1)
+	 rmesa->dma.flush = flush_last_swtcl_prim_compat;
+      else
+	 rmesa->dma.flush = flush_last_swtcl_prim;
+   }
+
+   assert( vsize == rmesa->swtcl.vertex_size * 4 );
+   assert( rmesa->dma.flush == flush_last_swtcl_prim ||
+	   rmesa->dma.flush == flush_last_swtcl_prim_compat);
+   assert (rmesa->dma.current.start + 
+	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+	   rmesa->dma.current.ptr);
+
+
+   {
+      GLubyte *head = rmesa->dma.current.address + rmesa->dma.current.ptr;
+      rmesa->dma.current.ptr += bytes;
+      rmesa->swtcl.numverts += nverts;
+      return head;
+   }
+
+}
+
+
+
+
+void radeon_emit_contiguous_verts( GLcontext *ctx, GLuint start, GLuint count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint vertex_size = rmesa->swtcl.vertex_size * 4;
+   CARD32 *dest = radeonAllocDmaLowVerts( rmesa, count-start, vertex_size );
+   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, dest, 
+					    vertex_size );
+}
+
+
+
+void radeon_emit_indexed_verts( GLcontext *ctx, GLuint start, GLuint count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   radeonAllocDmaRegionVerts( rmesa, 
+			      &rmesa->swtcl.indexed_verts, 
+			      count - start,
+			      rmesa->swtcl.vertex_size * 4, 
+			      64);
+
+   setup_tab[rmesa->swtcl.SetupIndex].emit( 
+      ctx, start, count, 
+      rmesa->swtcl.indexed_verts.address + rmesa->swtcl.indexed_verts.start, 
+      rmesa->swtcl.vertex_size * 4 );
+}
+
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    0
+#define HAVE_ELTS        1
+
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   0,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN,
+   0,
+   0,
+   0
+};
+
+static __inline void radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
+{
+   RADEON_NEWPRIM( rmesa );
+   rmesa->swtcl.hw_primitive = hw_prim[prim];
+   assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
+}
+
+static __inline void radeonEltPrimitive( radeonContextPtr rmesa, GLenum prim )
+{
+   RADEON_NEWPRIM( rmesa );
+   rmesa->swtcl.hw_primitive = hw_prim[prim] | RADEON_CP_VC_CNTL_PRIM_WALK_IND;
+}
+
+
+static void VERT_FALLBACK( GLcontext *ctx,
+			   GLuint start,
+			   GLuint count,
+			   GLuint flags )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
+   tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
+   tnl->Driver.Render.PrimTabVerts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
+   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_CLIP;
+}
+
+static void ELT_FALLBACK( GLcontext *ctx,
+			  GLuint start,
+			  GLuint count,
+			  GLuint flags )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
+   tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
+   tnl->Driver.Render.PrimTabElts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
+   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_CLIP;
+}
+
+
+#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define ELTS_VARS  GLushort *dest
+#define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
+#define ELT_INIT(prim) radeonEltPrimitive( rmesa, prim )
+#define NEW_PRIMITIVE()  RADEON_NEWPRIM( rmesa )
+#define NEW_BUFFER()  radeonRefillCurrentDmaRegion( rmesa )
+#define GET_CURRENT_VB_MAX_VERTS() \
+  (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
+#define GET_SUBSEQUENT_VB_MAX_VERTS() \
+  ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
+
+#define GET_CURRENT_VB_MAX_ELTS() \
+  ((RADEON_CMD_BUF_SZ - (rmesa->store.cmd_used + 16)) / 2)
+#define GET_SUBSEQUENT_VB_MAX_ELTS() \
+  ((RADEON_CMD_BUF_SZ - 1024) / 2)
+
+
+
+/* How do you extend an existing primitive?
+ */
+#define ALLOC_ELTS(nr)							\
+do {									\
+   if (rmesa->dma.flush == radeonFlushElts &&				\
+       rmesa->store.cmd_used + nr*2 < RADEON_CMD_BUF_SZ) {		\
+									\
+      dest = (GLushort *)(rmesa->store.cmd_buf +			\
+			  rmesa->store.cmd_used);			\
+      rmesa->store.cmd_used += nr*2;					\
+   }									\
+   else {								\
+      if (rmesa->dma.flush) {						\
+	 rmesa->dma.flush( rmesa );					\
+      }									\
+									\
+      radeonEmitVertexAOS( rmesa,					\
+			   rmesa->swtcl.vertex_size,			\
+			   (rmesa->dri.agp_buffer_offset +		\
+			    rmesa->swtcl.indexed_verts.buf->buf->idx * 	\
+			    RADEON_BUFFER_SIZE +			\
+			    rmesa->swtcl.indexed_verts.start));		\
+									\
+      dest = radeonAllocEltsOpenEnded( rmesa,				\
+				       rmesa->swtcl.vertex_format,	\
+				       rmesa->swtcl.hw_primitive,	\
+				       nr );				\
+   }									\
+} while (0)
+
+#define ALLOC_ELTS_NEW_PRIMITIVE(nr) ALLOC_ELTS( nr )
+
+#define EMIT_ELT(offset, x) (dest)[offset] = (GLushort) (x)
+#if defined(__i386__)
+#define EMIT_TWO_ELTS(offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
+#endif
+#define INCR_ELTS( nr ) dest += nr
+#define RELEASE_ELT_VERTS() \
+  radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ )
+#define EMIT_VERTS( ctx, j, nr ) \
+  radeon_emit_contiguous_verts(ctx, j, (j)+(nr))
+#define EMIT_INDEXED_VERTS( ctx, start, count ) \
+  radeon_emit_indexed_verts( ctx, start, count )
+
+
+#define TAG(x) radeon_dma_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+static GLboolean radeon_run_render( GLcontext *ctx,
+				    struct gl_pipeline_stage *stage )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i, length, flags = 0;
+   render_func *tab = TAG(render_tab_verts);
+
+   if (rmesa->swtcl.indexed_verts.buf && (!VB->Elts || stage->changed_inputs)) 
+      RELEASE_ELT_VERTS();
+   	
+   if (VB->ClipOrMask ||	     /* No clipping */
+       rmesa->swtcl.RenderIndex != 0 ||    /* No per-vertex manipulations */
+       ctx->Line.StippleFlag)        /* GH: THIS IS A HACK!!! */
+      return GL_TRUE;		
+
+   if (rmesa->dri.drmMinor < 3) {
+      /* drm 1.1 doesn't support vertex primitives starting in the
+       * middle of a buffer.  It doesn't support sane indexed vertices
+       * either.  drm 1.2 fixes both of these problems, but we don't have a
+       * compatibility layer to that version yet.  
+       */
+      return GL_TRUE;
+   }
+		
+   if (VB->Elts) {
+      tab = TAG(render_tab_elts);
+      if (!rmesa->swtcl.indexed_verts.buf)
+	 if (!TAG(emit_elt_verts)(ctx, 0, VB->Count))
+	    return GL_TRUE;	/* too many vertices */
+   }
+
+   tnl->Driver.Render.Start( ctx );
+
+   for (i = 0 ; !(flags & PRIM_LAST) ; i += length)
+   {
+      flags = VB->Primitive[i];
+      length = VB->PrimitiveLength[i];
+
+      if (RADEON_DEBUG & DEBUG_PRIMS)
+	 fprintf(stderr, "radeon_render.c: prim %s %d..%d\n", 
+		 _mesa_lookup_enum_by_nr(flags & PRIM_MODE_MASK), 
+		 i, i+length);
+
+      if (length)
+	 tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+   }
+
+   tnl->Driver.Render.Finish( ctx );
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+
+
+static void radeon_check_render( GLcontext *ctx,
+				 struct gl_pipeline_stage *stage )
+{
+   GLuint inputs = VERT_OBJ|VERT_CLIP|VERT_RGBA;
+
+   if (ctx->RenderMode == GL_RENDER) {
+      if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+	 inputs |= VERT_SPEC_RGB;
+
+      if (ctx->Texture.Unit[0]._ReallyEnabled)
+	 inputs |= VERT_TEX(0);
+
+      if (ctx->Texture.Unit[1]._ReallyEnabled)
+	 inputs |= VERT_TEX(1);
+
+      if (ctx->Fog.Enabled)
+	 inputs |= VERT_FOG_COORD;
+   }
+
+   stage->inputs = inputs;
+}
+
+
+static void dtr( struct gl_pipeline_stage *stage )
+{
+   (void)stage;
+}
+
+
+const struct gl_pipeline_stage _radeon_render_stage =
+{
+   "radeon render",
+   (_DD_NEW_SEPARATE_SPECULAR |
+    _NEW_TEXTURE|
+    _NEW_FOG|
+    _NEW_RENDERMODE),		/* re-check (new inputs) */
+   0,				/* re-run (always runs) */
+   GL_TRUE,			/* active */
+   0, 0,			/* inputs (set in check_render), outputs */
+   0, 0,			/* changed_inputs, private */
+   dtr,				/* destructor */
+   radeon_check_render,		/* check - initially set to alloc data */
+   radeon_run_render		/* run */
+};
+
+
+
+/**************************************************************************/
+
+
+static const GLuint reduced_hw_prim[GL_POLYGON+1] = {
+   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+};
+
+static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim );
+static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim );
+static void radeonResetLineStipple( GLcontext *ctx );
+
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#define CTX_ARG radeonContextPtr rmesa
+#define CTX_ARG2 rmesa
+#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, size * 4 )
+#define LOCAL_VARS						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
+   const GLuint shift = rmesa->swtcl.vertex_stride_shift;	\
+   const char *radeonverts = (char *)rmesa->swtcl.verts;
+#define VERT(x) (radeonVertex *)(radeonverts + (x << shift))
+#define VERTEX radeonVertex 
+#undef TAG
+#define TAG(x) radeon_##x
+#include "tnl_dd/t_dd_triemit.h"
+
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define QUAD( a, b, c, d ) radeon_quad( rmesa, a, b, c, d )
+#define TRI( a, b, c )     radeon_triangle( rmesa, a, b, c )
+#define LINE( a, b )       radeon_line( rmesa, a, b )
+#define POINT( a )         radeon_point( rmesa, a )
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define RADEON_TWOSIDE_BIT	0x01
+#define RADEON_UNFILLED_BIT	0x02
+#define RADEON_OFFSET_BIT	0x04 /* drmMinor == 1 */
+#define RADEON_MAX_TRIFUNC	0x08
+
+
+static struct {
+   points_func	        points;
+   line_func		line;
+   triangle_func	triangle;
+   quad_func		quad;
+} rast_tab[RADEON_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK  0
+#define DO_OFFSET   (IND & RADEON_OFFSET_BIT)
+#define DO_UNFILLED (IND & RADEON_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & RADEON_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_RGBA   1
+#define HAVE_SPEC   1
+#define HAVE_INDEX  0
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define TAB rast_tab
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (rmesa->swtcl.verts + (e<<rmesa->swtcl.vertex_stride_shift))
+
+#define VERT_SET_RGBA( v, c )    v->ui[coloroffset] = *(GLuint *)c
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+
+#define VERT_SET_SPEC( v, c )    if (havespec) COPY_3V(v->ub4[5], c )
+#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V(v0->ub4[5], v1->ub4[5])
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = v[idx]->ui[5]
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = spec[idx]
+
+#define LOCAL_VARS(n)							\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
+   GLuint color[n], spec[n];						\
+   GLuint coloroffset = (rmesa->swtcl.vertex_size == 4 ? 3 : 4);	\
+   GLboolean havespec = (rmesa->swtcl.vertex_size > 4);			\
+   (void) color; (void) spec; (void) coloroffset; (void) havespec;
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
+#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT|RADEON_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT|RADEON_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_UNFILLED_BIT|RADEON_OFFSET_BIT)
+#define TAG(x) x##_unfilled_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT|RADEON_UNFILLED_BIT|RADEON_OFFSET_BIT)
+#define TAG(x) x##_twoside_unfilled_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_twoside();
+   init_unfilled();
+   init_twoside_unfilled();
+   init_offset();
+   init_twoside_offset();
+   init_unfilled_offset();
+   init_twoside_unfilled_offset();
+}
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define VERT(x) (radeonVertex *)(radeonverts + (x << shift))
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      radeon_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   radeon_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   radeon_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   radeon_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do {					\
+   radeonRenderPrimitive( ctx, x );			\
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
+   const GLuint shift = rmesa->swtcl.vertex_stride_shift;		\
+   const char *radeonverts = (char *)rmesa->swtcl.verts;		\
+   const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+   const GLboolean stipple = ctx->Line.StippleFlag;		\
+   (void) elt; (void) stipple;
+#define RESET_STIPPLE	if ( stipple ) radeonResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) radeon_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) radeon_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+void radeonChooseRenderState( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint index = 0;
+   GLuint flags = ctx->_TriangleCaps;
+
+   if (!rmesa->TclFallback || rmesa->Fallback) 
+      return;
+
+   if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
+   if (flags & DD_TRI_UNFILLED)      index |= RADEON_UNFILLED_BIT;
+   if ((flags & DD_TRI_OFFSET) &&
+       rmesa->dri.drmMinor == 1)  index |= RADEON_OFFSET_BIT;
+
+   if (index != rmesa->swtcl.RenderIndex) {
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = radeon_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = radeon_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = radeon_fast_clipped_poly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+      }
+
+      rmesa->swtcl.RenderIndex = index;
+   }
+}
+
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+
+static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (rmesa->swtcl.hw_primitive != hwprim) {
+      RADEON_NEWPRIM( rmesa );
+      rmesa->swtcl.hw_primitive = hwprim;
+   }
+}
+
+static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   rmesa->swtcl.render_primitive = prim;
+   if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
+      radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
+}
+
+static void radeonRenderFinish( GLcontext *ctx )
+{
+}
+
+static void radeonResetLineStipple( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, lin );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static char *fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "glBlendEquation",
+   "glBlendFunc(mode != ADD)"
+   "RADEON_NO_RAST"
+};
+
+
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->Fallback;
+
+   if (mode) {
+      rmesa->Fallback |= bit;
+      if (oldfallback == 0) {
+	 RADEON_FIREVERTICES( rmesa );
+	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
+	 _swsetup_Wakeup( ctx );
+	 _tnl_need_projected_coords( ctx, GL_TRUE );
+	 rmesa->swtcl.RenderIndex = ~0;
+         if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+            fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+   else {
+      rmesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = radeonRenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
+	 tnl->Driver.Render.Finish = radeonRenderFinish;
+	 tnl->Driver.Render.BuildVertices = radeonBuildVertices;
+	 tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
+	 if (rmesa->TclFallback) {
+	    /* These are already done if rmesa->TclFallback goes to
+	     * zero above. But not if it doesn't (RADEON_NO_TCL for
+	     * example?)
+	     */
+	    radeonChooseVertexState( ctx );
+	    radeonChooseRenderState( ctx );
+	 }
+         if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+            fprintf(stderr, "Radeon end rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+}
+
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void radeonInitSwtcl( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint size = TNL_CONTEXT(ctx)->vb.Size;
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      init_setup_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.Render.Start = radeonRenderStart;
+   tnl->Driver.Render.Finish = radeonRenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+   tnl->Driver.Render.BuildVertices = radeonBuildVertices;
+
+   rmesa->swtcl.verts = (char *)ALIGN_MALLOC( size * 16 * 4, 32 );
+   rmesa->swtcl.RenderIndex = ~0;
+   rmesa->swtcl.render_primitive = GL_TRIANGLES;
+   rmesa->swtcl.hw_primitive = 0;
+}
+
+
+void radeonDestroySwtcl( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   if (rmesa->swtcl.verts) {
+      ALIGN_FREE(rmesa->swtcl.verts);
+      rmesa->swtcl.verts = 0;
+   }
+
+   if (rmesa->UbyteSecondaryColor.Ptr) {
+      ALIGN_FREE(rmesa->UbyteSecondaryColor.Ptr);
+      rmesa->UbyteSecondaryColor.Ptr = 0;
+   }
+
+   if (rmesa->UbyteColor.Ptr) {
+      ALIGN_FREE(rmesa->UbyteColor.Ptr);
+      rmesa->UbyteColor.Ptr = 0;
+   }
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vb.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.h
index 78337fae8..43530332a 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_vb.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.h
@@ -29,33 +29,30 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 /*
  * Authors:
- *   Keith Whitwell <keithw@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
  *
  */
 
-#ifndef RADEONVB_INC
-#define RADEONVB_INC
+#ifndef __RADEON_TRIS_H__
+#define __RADEON_TRIS_H__
 
 #include "mtypes.h"
 #include "swrast/swrast.h"
 #include "radeon_context.h"
 
-#define _RADEON_NEW_VERTEX_STATE (_DD_NEW_SEPARATE_SPECULAR |	\
-                                  _DD_NEW_TRI_LIGHT_TWOSIDE |	\
-                                  _DD_NEW_TRI_UNFILLED |	\
-			          _NEW_TEXTURE |		\
-			          _NEW_FOG)
+extern void radeonInitSwtcl( GLcontext *ctx );
+extern void radeonDestroySwtcl( GLcontext *ctx );
 
-extern void radeonCheckTexSizes( GLcontext *ctx );
+extern void radeonChooseRenderState( GLcontext *ctx );
 extern void radeonChooseVertexState( GLcontext *ctx );
 
+extern void radeonCheckTexSizes( GLcontext *ctx );
+
 extern void radeonBuildVertices( GLcontext *ctx, GLuint start, GLuint count,
 				 GLuint newinputs );
 
 extern void radeonPrintSetupFlags(char *msg, GLuint flags );
 
-extern void radeonInitVB( GLcontext *ctx );
-extern void radeonFreeVB( GLcontext *ctx );
 
 extern void radeon_emit_contiguous_verts( GLcontext *ctx,
 					  GLuint start,
@@ -71,4 +68,9 @@ extern void radeon_translate_vertex( GLcontext *ctx,
 
 extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
 
+extern void radeon_import_float_colors( GLcontext *ctx );
+extern void radeon_import_float_spec_colors( GLcontext *ctx );
+
+
+
 #endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.c
new file mode 100644
index 000000000..631355fa1
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.c
@@ -0,0 +1,539 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+
+#include "mmath.h"
+#include "mtypes.h"
+#include "enums.h"
+#include "colormac.h"
+#include "light.h"
+
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_LOOP   0
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        1
+
+
+#define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
+#define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
+#define HW_LINE_LOOP        0
+#define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
+#define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+#define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
+#define HW_TRIANGLE_STRIP_1 0
+#define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
+#define HW_QUADS            0
+#define HW_QUAD_STRIP       0
+#define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
+
+
+static GLboolean discreet_prim[0x10] = {
+   0,				/* none */
+   1,				/* points */
+   1,				/* lines */
+   0,				/* line_strip */
+   1,				/* tri_list */
+   0,				/* tri_fan */
+   0,				/* tri_type_2 */
+   1,				/* rect list (unused) */
+   1,				/* 3 vert point */
+   1,				/* 3 vert line */
+   0,
+   0,
+   0,
+   0,
+   0,
+};
+   
+
+#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define ELTS_VARS  GLushort *dest
+
+#define ELT_INIT(prim, hw_prim) \
+   radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
+
+#define GET_ELTS() rmesa->tcl.Elts
+
+
+#define NEW_PRIMITIVE()  RADEON_NEWPRIM( rmesa )
+#define NEW_BUFFER()  radeonRefillCurrentDmaRegion( rmesa )
+
+/* Don't really know how many elts will fit in what's left of cmdbuf,
+ * as there is state to emit, etc:
+ */
+
+#if 0
+#define GET_CURRENT_VB_MAX_ELTS() \
+   ((RADEON_CMD_BUF_SZ - (rmesa->store.cmd_used + 16)) / 2) 
+#define GET_SUBSEQUENT_VB_MAX_ELTS() ((RADEON_CMD_BUF_SZ - 16) / 2) 
+#else
+/* Testing on isosurf shows a maximum around here.  Don't know if it's
+ * the card or driver or kernel module that is causing the behaviour.
+ */
+#define GET_CURRENT_VB_MAX_ELTS() 300
+#define GET_SUBSEQUENT_VB_MAX_ELTS() 300
+#endif
+
+#define RESET_STIPPLE() do {			\
+   RADEON_STATECHANGE( rmesa, lin );		\
+   radeonEmitState( rmesa );			\
+} while (0)
+
+#define AUTO_STIPPLE( mode )  do {		\
+   RADEON_STATECHANGE( rmesa, lin );		\
+   if (mode)					\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |=	\
+	 RADEON_LINE_PATTERN_AUTO_RESET;	\
+   else						\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
+	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
+   radeonEmitState( rmesa );			\
+} while (0)
+
+
+/* How do you extend an existing primitive?
+ */
+#define ALLOC_ELTS(nr)							\
+do {									\
+   if (rmesa->dma.flush == radeonFlushElts &&				\
+       rmesa->store.cmd_used + nr*2 < RADEON_CMD_BUF_SZ) {		\
+									\
+      dest = (GLushort *)(rmesa->store.cmd_buf + 			\
+			  rmesa->store.cmd_used);			\
+      rmesa->store.cmd_used += nr*2;					\
+   }									\
+   else {								\
+      if (rmesa->dma.flush)						\
+	 rmesa->dma.flush( rmesa );					\
+									\
+      radeonEmitAOS( rmesa,						\
+	  	     rmesa->tcl.aos_components,				\
+		     rmesa->tcl.nr_aos_components,			\
+		     0 );						\
+									\
+      dest = radeonAllocEltsOpenEnded( rmesa,				\
+				       rmesa->tcl.vertex_format,	\
+				       rmesa->tcl.hw_primitive,		\
+				       nr );				\
+   }									\
+} while (0) 
+
+
+
+/* TODO: Try to extend existing primitive if both are identical,
+ * discreet and there are no intervening state changes.  (Somewhat
+ * duplicates changes to DrawArrays code)
+ */
+static void EMIT_PRIM( GLcontext *ctx, 
+		       GLenum prim, 
+		       GLuint hwprim, 
+		       GLuint start, 
+		       GLuint count)	
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   radeonTclPrimitive( ctx, prim, hwprim );
+   
+   radeonEmitAOS( rmesa,
+		  rmesa->tcl.aos_components,
+		  rmesa->tcl.nr_aos_components,
+		  start );
+   
+   /* Why couldn't this packet have taken an offset param?
+    */
+   radeonEmitVbufPrim( rmesa,
+		       rmesa->tcl.vertex_format,
+		       rmesa->tcl.hw_primitive,
+		       count - start );
+}
+
+
+
+/* Try & join small primitives
+ */
+#if 0
+#define PREFER_DISCREET_ELT_PRIM( NR, PRIM ) 0
+#else
+#define PREFER_DISCREET_ELT_PRIM( NR, PRIM )			\
+  ((NR) < 20 ||							\
+   ((NR) < 40 &&						\
+    rmesa->tcl.hw_primitive == (PRIM|				\
+			    RADEON_CP_VC_CNTL_PRIM_WALK_IND|	\
+			    RADEON_CP_VC_CNTL_TCL_ENABLE)))
+#endif
+
+#define EMIT_ELT(offset, x) (dest)[offset] = (GLushort) (x)
+#if defined(__i386__)
+#define EMIT_TWO_ELTS(offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
+#endif
+#define INCR_ELTS( nr ) dest += nr
+#define RELEASE_ELT_VERTS() \
+   radeonReleaseArrays( ctx, ~0 )
+
+
+
+#define TAG(x) tcl_##x
+#include "tnl_dd/t_dd_dmatmp2.h"
+
+/**********************************************************************/
+/*                          External entrypoints                     */
+/**********************************************************************/
+
+void radeonEmitPrimitive( GLcontext *ctx, 
+			  GLuint first,
+			  GLuint last,
+			  GLuint flags )
+{
+   tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void radeonEmitEltPrimitive( GLcontext *ctx, 
+			     GLuint first,
+			     GLuint last,
+			     GLuint flags )
+{
+   tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void radeonTclPrimitive( GLcontext *ctx, 
+			 GLenum prim,
+			 int hw_prim )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint se_cntl;
+   GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
+
+   if (newprim != rmesa->tcl.hw_primitive ||
+       !discreet_prim[hw_prim&0xf]) {
+      RADEON_NEWPRIM( rmesa );
+      rmesa->tcl.hw_primitive = newprim;
+   }
+
+   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
+   se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
+
+   if (prim == GL_POLYGON && (ctx->_TriangleCaps & DD_FLATSHADE)) 
+      se_cntl |= RADEON_FLAT_SHADE_VTX_0;
+   else
+      se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
+
+   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
+   }
+}
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+/* TCL render.
+ */
+static GLboolean radeon_run_tcl_render( GLcontext *ctx,
+					struct gl_pipeline_stage *stage )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i,flags = 0,length;
+
+   /* TODO: separate this from the swtnl pipeline 
+    */
+   if (rmesa->TclFallback)
+      return GL_TRUE;	/* fallback to software t&l */
+
+   if (VB->Count == 0)
+      return GL_FALSE;
+
+   radeonReleaseArrays( ctx, stage->changed_inputs );
+   radeonEmitArrays( ctx, stage->inputs );
+
+   rmesa->tcl.Elts = VB->Elts;
+
+   for (i = VB->FirstPrimitive ; !(flags & PRIM_LAST) ; i += length)
+   {
+      flags = VB->Primitive[i];
+      length = VB->PrimitiveLength[i];
+
+      if (RADEON_DEBUG & DEBUG_PRIMS)
+	 fprintf(stderr, "%s: prim %s %d..%d\n", 
+		 __FUNCTION__,
+		 _mesa_lookup_enum_by_nr(flags & PRIM_MODE_MASK), 
+		 i, i+length);
+
+      if (!length)
+	 continue;
+
+      if (rmesa->tcl.Elts)
+	 radeonEmitEltPrimitive( ctx, i, i+length, flags );
+      else
+	 radeonEmitPrimitive( ctx, i, i+length, flags );
+   }
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+
+
+static void radeon_check_tcl_render( GLcontext *ctx,
+				     struct gl_pipeline_stage *stage )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint inputs = VERT_OBJ;
+
+   if (ctx->RenderMode == GL_RENDER) {
+      /* Make all this event-driven:
+       */
+      if (ctx->Light.Enabled) {
+	 inputs |= VERT_NORM;
+
+	 if (ctx->Light.ColorMaterialEnabled) {
+	    inputs |= VERT_RGBA;
+	 }
+      }
+      else {
+	 inputs |= VERT_RGBA;
+	 
+	 if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+	    inputs |= VERT_SPEC_RGB;
+	 }
+      }
+
+      if (ctx->Texture.Unit[0]._ReallyEnabled) {
+	 if (ctx->Texture.Unit[0].TexGenEnabled) {
+	    if (rmesa->TexGenNeedNormals[0]) {
+	       inputs |= VERT_NORM;
+	    }
+	 } else {
+	    inputs |= VERT_TEX(0);
+	 }
+      }
+
+      if (ctx->Texture.Unit[1]._ReallyEnabled) {
+	 if (ctx->Texture.Unit[1].TexGenEnabled) {
+	    if (rmesa->TexGenNeedNormals[1]) {
+	       inputs |= VERT_NORM;
+	    }
+	 } else {
+	    inputs |= VERT_TEX(1);
+	 }
+      }
+
+      stage->inputs = inputs;
+      stage->active = 1;
+   }
+   else
+      stage->active = 0;
+}
+
+static void radeon_init_tcl_render( GLcontext *ctx,
+				    struct gl_pipeline_stage *stage )
+{
+   stage->check = radeon_check_tcl_render;
+   stage->check( ctx, stage );
+}
+
+static void dtr( struct gl_pipeline_stage *stage )
+{
+   (void)stage;
+}
+
+
+/* Initial state for tcl stage.  
+ */
+const struct gl_pipeline_stage _radeon_tcl_stage =
+{
+   "radeon render",
+   (_DD_NEW_SEPARATE_SPECULAR |
+    _NEW_LIGHT|
+    _NEW_TEXTURE|
+    _NEW_FOG|
+    _NEW_RENDERMODE),		/* re-check (new inputs) */
+   0,				/* re-run (always runs) */
+   GL_TRUE,			/* active */
+   0, 0,			/* inputs (set in check_render), outputs */
+   0, 0,			/* changed_inputs, private */
+   dtr,				/* destructor */
+   radeon_init_tcl_render,	/* check - initially set to alloc data */
+   radeon_run_tcl_render	/* run */
+};
+
+
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+
+/*-----------------------------------------------------------------------
+ * Manage TCL fallbacks
+ */
+
+
+static void transition_to_swtnl( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint se_cntl;
+
+   RADEON_NEWPRIM( rmesa );
+   rmesa->swtcl.vertex_format = 0;
+
+   radeonChooseVertexState( ctx );
+   radeonChooseRenderState( ctx );
+
+   _mesa_validate_all_lighting_tables( ctx ); 
+
+   tnl->Driver.NotifyMaterialChange = 
+      _mesa_validate_all_lighting_tables;
+
+   radeonReleaseArrays( ctx, ~0 );
+
+   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
+   se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
+	 
+   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
+   }
+}
+
+
+static void transition_to_hwtnl( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+			  RADEON_TEX1_W_ROUTING_USE_Q1);
+
+   if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+      _tnl_need_projected_coords( ctx, GL_FALSE );
+   }
+
+   radeonUpdateMaterial( ctx );
+
+   tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+
+   if ( rmesa->dma.flush )			
+      rmesa->dma.flush( rmesa );	
+
+   rmesa->dma.flush = 0;
+   rmesa->swtcl.vertex_format = 0;
+   
+   if (rmesa->swtcl.indexed_verts.buf) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+			      __FUNCTION__ );
+
+   if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+      fprintf(stderr, "Radeon end tcl fallback\n");
+}
+
+static char *fallbackStrings[] = {
+   "Rasterization fallback",
+   "Unfilled triangles",
+   "Twosided lighting, differing materials",
+   "Materials in VB (maybe between begin/end)",
+   "Texgen unit 0",
+   "Texgen unit 1",
+   "Texgen unit 2",
+   "User disable"
+};
+
+
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+
+void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->TclFallback;
+
+   if (mode) {
+      rmesa->TclFallback |= bit;
+      if (oldfallback == 0) {
+	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
+		    getFallbackString( bit ));
+	 transition_to_swtnl( ctx );
+      }
+   }
+   else {
+      rmesa->TclFallback &= ~bit;
+      if (oldfallback == bit) {
+	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+	    fprintf(stderr, "Radeon end tcl fallback %s\n",
+		    getFallbackString( bit ));
+	 transition_to_hwtnl( ctx );
+      }
+   }
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h
new file mode 100644
index 000000000..d2d0145ad
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h
@@ -0,0 +1,66 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Grahpics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAHPICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef __RADEON_TCL_H__
+#define __RADEON_TCL_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "radeon_context.h"
+
+extern void radeonTclPrimitive( GLcontext *ctx, GLenum prim, int hw_prim );
+extern void radeonEmitEltPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+				    GLuint flags );
+extern void radeonEmitPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+				 GLuint flags );
+
+extern void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+					      
+#define RADEON_TCL_FALLBACK_RASTER            0x1 /* rasterization */
+#define RADEON_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
+#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
+#define RADEON_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
+#define RADEON_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
+#define RADEON_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
+#define RADEON_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
+#define RADEON_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
+
+#define RADEON_MAX_TCL_VERTSIZE (4*4) /* using maos now... */
+
+#define TCL_FALLBACK( ctx, bit, mode )	radeonTclFallback( ctx, bit, mode )
+
+
+#endif
+#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl_vbtmp.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl_vbtmp.h
new file mode 100644
index 000000000..a0a44e180
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl_vbtmp.h
@@ -0,0 +1,373 @@
+/* $Id: radeon_tcl_vbtmp.h,v 1.2 2002/06/12 15:50:26 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+#undef TCL_DEBUG
+#ifndef TCL_DEBUG
+#define TCL_DEBUG 0
+#endif
+
+static void TAG(emit)( GLcontext *ctx,
+		       GLuint start, GLuint end,
+		       void *dest )
+{
+   LOCALVARS
+      struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint (*tc0)[4], (*tc1)[4];
+   GLfloat *fog;
+   GLuint (*tc2)[4], (*norm)[3];
+   GLubyte (*col)[4], (*spec)[4];
+   GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride;
+   GLuint tc2_stride, norm_stride;
+   GLuint (*coord)[4];
+   GLuint coord_stride;
+   GLubyte dummy[4];
+   int i;
+
+   union emit_union *v = (union emit_union *)dest;
+
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s\n", __FUNCTION__); 
+
+   /* The vertex code expects Obj to be clean to element 3.  To fix
+    * this, add more vertex code (for obj-2, obj-3) or preferably move
+    * to maos.  
+    */
+   if (VB->ObjPtr->size < 3) {
+      if (VB->ObjPtr->flags & VEC_NOT_WRITEABLE) {
+	 VB->import_data( ctx, VERT_OBJ, VEC_NOT_WRITEABLE );
+      }
+      _mesa_vector4f_clean_elem( VB->ObjPtr, VB->Count, 2 );
+   }
+
+   if (DO_W && VB->ObjPtr->size < 4) {
+      if (VB->ObjPtr->flags & VEC_NOT_WRITEABLE) {
+	 VB->import_data( ctx, VERT_OBJ, VEC_NOT_WRITEABLE );
+      }
+      _mesa_vector4f_clean_elem( VB->ObjPtr, VB->Count, 3 );
+   }
+
+   coord = (GLuint (*)[4])VB->ObjPtr->data;
+   coord_stride = VB->ObjPtr->stride;
+
+   if (DO_TEX2) {
+      const GLuint t2 = GET_TEXSOURCE(2);
+      tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data;
+      tc2_stride = VB->TexCoordPtr[t2]->stride;
+      if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) {
+	 if (VB->TexCoordPtr[t2]->flags & VEC_NOT_WRITEABLE) {
+	    VB->import_data( ctx, VERT_TEX2, VEC_NOT_WRITEABLE );
+	 }
+	 _mesa_vector4f_clean_elem( VB->TexCoordPtr[t2], VB->Count, 3 );
+      }
+   }
+
+   if (DO_TEX1) {
+      if (VB->TexCoordPtr[1]) {
+	 const GLuint t1 = GET_TEXSOURCE(1);
+	 tc1 = (GLuint (*)[4])VB->TexCoordPtr[t1]->data;
+	 tc1_stride = VB->TexCoordPtr[t1]->stride;
+	 if (DO_PTEX && VB->TexCoordPtr[t1]->size < 4) {
+	    if (VB->TexCoordPtr[t1]->flags & VEC_NOT_WRITEABLE) {
+	       VB->import_data( ctx, VERT_TEX1, VEC_NOT_WRITEABLE );
+	    }
+	    _mesa_vector4f_clean_elem( VB->TexCoordPtr[t1], VB->Count, 3 );
+	 }
+      } else {
+	 tc1 = (GLuint (*)[4])&ctx->Current.Texcoord[1]; /* could be anything, really */
+	 tc1_stride = 0;
+      }
+   }
+
+   if (DO_TEX0) {
+      if (VB->TexCoordPtr[0]) {
+	 const GLuint t0 = GET_TEXSOURCE(0);
+	 tc0_stride = VB->TexCoordPtr[t0]->stride;
+	 tc0 = (GLuint (*)[4])VB->TexCoordPtr[t0]->data;
+	 if (DO_PTEX && VB->TexCoordPtr[t0]->size < 4) {
+	    if (VB->TexCoordPtr[t0]->flags & VEC_NOT_WRITEABLE) {
+	       VB->import_data( ctx, VERT_TEX0, VEC_NOT_WRITEABLE );
+	    }
+	    _mesa_vector4f_clean_elem( VB->TexCoordPtr[t0], VB->Count, 3 );
+	 }
+      } else {
+	 tc0 = (GLuint (*)[4])&ctx->Current.Texcoord[0]; /* could be anything, really */
+	 tc0_stride = 0;
+      }
+	 
+   }
+
+   if (DO_NORM) {
+      if (VB->NormalPtr) {
+	 norm_stride = VB->NormalPtr->stride;
+	 norm = (GLuint (*)[3])VB->NormalPtr->data;
+      } else {
+	 norm_stride = 0;
+	 norm = (GLuint (*)[3])&ctx->Current.Normal;
+      }
+   }
+
+   if (DO_RGBA) {
+      if (VB->ColorPtr[0]) {
+	 /* This is incorrect when colormaterial is enabled:
+	  */
+	 if (VB->ColorPtr[0]->Type != GL_UNSIGNED_BYTE) {
+	    if (0) fprintf(stderr, "IMPORTING FLOAT COLORS\n");
+	    IMPORT_FLOAT_COLORS( ctx );
+	 }
+	 col = (GLubyte (*)[4])VB->ColorPtr[0]->Ptr;
+	 col_stride = VB->ColorPtr[0]->StrideB;
+      } else {
+	 col = &dummy; /* any old memory is fine */
+	 col_stride = 0;
+      }
+      
+   }
+
+   if (DO_SPEC) {
+      if (VB->SecondaryColorPtr[0]) {
+	 if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+	    IMPORT_FLOAT_SPEC_COLORS( ctx );
+	 spec = (GLubyte (*)[4])VB->SecondaryColorPtr[0]->Ptr;
+	 spec_stride = VB->SecondaryColorPtr[0]->StrideB;
+      } else {
+	 spec = &dummy;
+	 spec_stride = 0;
+      }
+	 
+   }
+
+   if (DO_FOG) {
+      if (VB->FogCoordPtr) {
+	 fog = VB->FogCoordPtr->data;
+	 fog_stride = VB->FogCoordPtr->stride;
+      } else {
+	 fog = (GLfloat *)&dummy; *fog = 0;
+	 fog_stride = 0;
+      }
+	      
+   }
+   
+   
+   if (VB->importable_data) {
+      if (start) {
+	 coord =  (GLuint (*)[4])((GLubyte *)coord + start * coord_stride);
+	 if (DO_TEX0)
+	    tc0 =  (GLuint (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+	 if (DO_TEX1) 
+	    tc1 =  (GLuint (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+	 if (DO_TEX2) 
+	    tc2 =  (GLuint (*)[4])((GLubyte *)tc2 + start * tc2_stride);
+	 if (DO_NORM) 
+	    norm =  (GLuint (*)[3])((GLubyte *)norm + start * norm_stride);
+	 if (DO_RGBA) 
+	    STRIDE_4UB(col, start * col_stride);
+	 if (DO_SPEC)
+	    STRIDE_4UB(spec, start * spec_stride);
+	 if (DO_FOG)
+	    STRIDE_F(fog, start * fog_stride);
+      }
+
+      for (i=start; i < end; i++) {
+	 v[0].ui = coord[0][0];
+	 v[1].ui = coord[0][1];
+	 v[2].ui = coord[0][2];
+	 if (TCL_DEBUG) fprintf(stderr, "%d: %.2f %.2f %.2f ", i, v[0].f, v[1].f, v[2].f);
+	 if (DO_W) {
+	    v[3].ui = coord[0][3];
+	    if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[3].f);
+	    v += 4;
+	 } 
+	 else
+	    v += 3;
+	 coord =  (GLuint (*)[4])((GLubyte *)coord +  coord_stride);
+
+	 if (DO_NORM) {
+	    v[0].ui = norm[0][0];
+	    v[1].ui = norm[0][1];
+	    v[2].ui = norm[0][2];
+	    if (TCL_DEBUG) fprintf(stderr, "norm: %.2f %.2f %.2f ", v[0].f, v[1].f, v[2].f);
+	    v += 3;
+	    norm =  (GLuint (*)[3])((GLubyte *)norm +  norm_stride);
+	 }
+	 if (DO_RGBA) {
+	    v[0].ui = *(GLuint *)&col[0];
+	    STRIDE_4UB(col, col_stride);
+	    if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
+	    v++;
+	 }
+	 if (DO_SPEC || DO_FOG) {
+	    if (DO_SPEC) {
+	       v[0].ub[0] = spec[0][0];
+	       v[0].ub[1] = spec[0][1];
+	       v[0].ub[2] = spec[0][2];
+	       STRIDE_4UB(spec, spec_stride);
+	    }
+	    if (DO_FOG) {
+	       v[0].ub[3] = fog[0] * 255.0;
+	       STRIDE_F(fog, fog_stride);
+	    }
+	    if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
+	    v++;
+	 }
+	 if (DO_TEX0) {
+	    v[0].ui = tc0[0][0];
+	    v[1].ui = tc0[0][1];
+	    if (TCL_DEBUG) fprintf(stderr, "t0: %.2f %.2f ", v[0].f, v[1].f);
+	    if (DO_PTEX) {
+	       v[2].ui = tc0[0][3];
+	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc0 =  (GLuint (*)[4])((GLubyte *)tc0 +  tc0_stride);
+	 }
+	 if (DO_TEX1) {
+	    v[0].ui = tc1[0][0];
+	    v[1].ui = tc1[0][1];
+	    if (TCL_DEBUG) fprintf(stderr, "t1: %.2f %.2f ", v[0].f, v[1].f);
+	    if (DO_PTEX) {
+	       v[2].ui = tc1[0][3];
+	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc1 =  (GLuint (*)[4])((GLubyte *)tc1 +  tc1_stride);
+	 } 
+	 if (DO_TEX2) {
+	    v[0].ui = tc2[0][0];
+	    v[1].ui = tc2[0][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc2[0][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc2 =  (GLuint (*)[4])((GLubyte *)tc2 +  tc2_stride);
+	 } 
+	 if (TCL_DEBUG) fprintf(stderr, "\n");
+      }
+   } else {
+      for (i=start; i < end; i++) {
+	 v[0].ui = coord[i][0];
+	 v[1].ui = coord[i][1];
+	 v[2].ui = coord[i][2];
+	 if (DO_W) {
+	    v[3].ui = coord[i][3];
+	    v += 4;
+	 } 
+	 else
+	    v += 3;
+
+	 if (DO_NORM) {
+	    v[0].ui = norm[i][0];
+	    v[1].ui = norm[i][1];
+	    v[2].ui = norm[i][2];
+	    v += 3;
+	 }
+	 if (DO_RGBA) {
+	    v[0].ui = *(GLuint *)&col[i];
+	    v++;
+	 }
+	 if (DO_SPEC || DO_FOG) {
+	    if (DO_SPEC) {
+	       v[0].ub[0] = spec[i][0];
+	       v[0].ub[1] = spec[i][1];
+	       v[0].ub[2] = spec[i][2];
+	    }
+	    if (DO_FOG) {
+	       v[0].ub[3] = fog[i] * 255.0;
+	    }
+	    v++;
+	 }
+	 if (DO_TEX0) {
+	    v[0].ui = tc0[i][0];
+	    v[1].ui = tc0[i][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc0[i][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	 }
+	 if (DO_TEX1) {
+	    v[0].ui = tc1[i][0];
+	    v[1].ui = tc1[i][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc1[i][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	 } 
+	 if (DO_TEX2) {
+	    v[0].ui = tc2[i][0];
+	    v[1].ui = tc2[i][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc2[i][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	 } 
+      }
+   }
+}
+
+
+
+static void TAG(init)( void )
+{
+   int sz = 3;
+   if (DO_W) sz++;
+   if (DO_NORM) sz += 3;
+   if (DO_RGBA) sz++;
+   if (DO_SPEC || DO_FOG) sz++;
+   if (DO_TEX0) sz += 2;
+   if (DO_TEX0 && DO_PTEX) sz++;
+   if (DO_TEX1) sz += 2;
+   if (DO_TEX1 && DO_PTEX) sz++;
+   if (DO_TEX2) sz += 2;
+   if (DO_TEX2 && DO_PTEX) sz++;
+
+   setup_tab[IDX].emit = TAG(emit);
+   setup_tab[IDX].vertex_format = IND;
+   setup_tab[IDX].vertex_size = sz;
+}
+
+
+#undef IND
+#undef TAG
+#undef IDX
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c
index 7f8cd7584..42285ae85 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c
@@ -32,7 +32,7 @@
 #include "radeon_context.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
-#include "radeon_vb.h"
+#include "radeon_swtcl.h"
 #include "radeon_tex.h"
 
 #include "colormac.h"
@@ -167,27 +167,19 @@ static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj )
    if (!t)
       return NULL;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
       fprintf( stderr, __FUNCTION__"( %p, %p )\n", texObj, t );
    }
 
-   /* Initialize non-image-dependent parts of the state:
-    */
    t->tObj = texObj;
-#if 0
-   t->dirty_images = ~0;
-#endif
-   t->pp_txfilter = RADEON_BORDER_MODE_OGL;
-   t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
-		     RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
-
    make_empty_list( t );
 
+   /* Initialize non-image-dependent parts of the state:
+    */
    radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
    radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
    radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
    radeonSetTexBorderColor( t, texObj->BorderColor );
-
    return t;
 }
 
@@ -316,13 +308,6 @@ static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level,
                           &ctx->Unpack, texObj, texImage);
 
    t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
 }
 
 
@@ -357,13 +342,6 @@ static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
 			     texImage);
 
    t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
 }
 
 
@@ -378,6 +356,8 @@ static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    radeonTexObjPtr t = (radeonTexObjPtr)texObj->DriverData;
 
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
    if ( t ) {
       radeonSwapOutTexObj( rmesa, t );
    }
@@ -396,13 +376,6 @@ static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
                           &ctx->Unpack, texObj, texImage);
 
    t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
 }
 
 
@@ -418,10 +391,11 @@ static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
 
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
    assert( t ); /* this _should_ be true */
    if ( t ) {
       radeonSwapOutTexObj( rmesa, t );
-      t->dirty_images |= (1 << level);
    }
    else {
       t = radeonAllocTexObj(texObj);
@@ -437,88 +411,12 @@ static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
 			     texImage);
 
    t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
-}
-
-#if 0
-static void radeonTexImage3D( GLcontext *ctx, GLenum target, GLint level,
-                              GLint internalFormat,
-                              GLint width, GLint height, GLint depth,
-                              GLint border,
-                              GLenum format, GLenum type, const GLvoid *pixels,
-                              const struct gl_pixelstore_attrib *packing,
-                              struct gl_texture_object *texObj,
-                              struct gl_texture_image *texImage )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   radeonTexObjPtr t = (radeonTexObjPtr)texObj->DriverData;
-
-   if ( t ) {
-      radeonSwapOutTexObj( rmesa, t );
-   }
-   else {
-      t = radeonAllocTexObj( texObj );
-      if (!t) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
-         return;
-      }
-      texObj->DriverData = t;
-   }
-
-   /* Note, this will call radeonChooseTextureFormat */
-   _mesa_store_teximage3d(ctx, target, level, internalFormat,
-                          width, height, depth, border, format, type, pixels,
-                          &ctx->Unpack, texObj, texImage);
-
-   t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
 }
 
 
-static void radeonTexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
-                                 GLint xoffset, GLint yoffset, GLint zoffset,
-                                 GLsizei width, GLsizei height, GLint depth,
-                                 GLenum format, GLenum type,
-                                 const GLvoid *pixels,
-                                 const struct gl_pixelstore_attrib *packing,
-                                 struct gl_texture_object *texObj,
-                                 struct gl_texture_image *texImage )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
-
-   assert( t ); /* this _should_ be true */
-
-   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
-                             width, height, depth, format, type, pixels,
-                             packing, texObj, texImage);
-
-   t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
-}
-#endif
-
 
 #define SCALED_FLOAT_TO_BYTE( x, scale ) \
-		((((GLint)((256.0F / scale) * (x))) - 1) / 2)
+		(((GLuint)((255.0F / scale) * (x))) / 2)
 
 static void radeonTexEnv( GLcontext *ctx, GLenum target,
 			  GLenum pname, const GLfloat *param )
@@ -527,7 +425,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
    GLuint unit = ctx->Texture.CurrentUnit;
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
+   if ( RADEON_DEBUG & DEBUG_STATE ) {
       fprintf( stderr, "%s( %s )\n",
 	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
    }
@@ -538,11 +436,9 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
       GLuint envColor;
       UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
       envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
-      if ( rmesa->state.hw.texture[unit].pp_tfactor != envColor ) {
-	 if ( rmesa->state.texture.unit[unit].texobj ) {
-	    RADEON_STATECHANGE( rmesa, (RADEON_UPLOAD_TEX0 << unit) );
-         }
-	 rmesa->state.hw.texture[unit].pp_tfactor = envColor;
+      if ( rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] != envColor ) {
+	 RADEON_STATECHANGE( rmesa, tex[unit] );
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] = envColor;
       }
       break;
    }
@@ -560,14 +456,14 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
       if ( bias == 0 ) {
 	 b = 0;
       } else if ( bias > 0 ) {
-	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 4.0 )) << 8;
+	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 4.0 )) << RADEON_LOD_BIAS_SHIFT;
       } else {
-	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 1.0 )) << 8;
+	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 1.0 )) << RADEON_LOD_BIAS_SHIFT;
       }
-      if ( rmesa->state.hw.texture[unit].pp_txfilter != b ) {
-	 if ( rmesa->state.texture.unit[unit].texobj )
-	    RADEON_STATECHANGE( rmesa, (RADEON_UPLOAD_TEX0 << unit) );
-	 rmesa->state.hw.texture[unit].pp_txfilter = b;
+      if ( (rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] & RADEON_LOD_BIAS_MASK) != b ) {
+	 RADEON_STATECHANGE( rmesa, tex[unit] );
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] &= ~RADEON_LOD_BIAS_MASK;
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] |= (b & RADEON_LOD_BIAS_MASK);
       }
       break;
    }
@@ -584,7 +480,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
+   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
       fprintf( stderr, __FUNCTION__"( %s )\n",
 	       _mesa_lookup_enum_by_nr( pname ) );
    }
@@ -628,12 +524,9 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
       return;
    }
 
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
+   /* Mark this texobj as dirty (one bit per tex unit)
+    */
+   t->dirty_state = TEX_ALL;
 }
 
 
@@ -644,7 +537,7 @@ static void radeonBindTexture( GLcontext *ctx, GLenum target,
    radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
    GLuint unit = ctx->Texture.CurrentUnit;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
+   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
       fprintf( stderr, __FUNCTION__"( %p ) unit=%d\n", texObj, unit );
    }
 
@@ -662,7 +555,7 @@ static void radeonDeleteTexture( GLcontext *ctx,
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
+   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
       fprintf( stderr, __FUNCTION__"( %p )\n", texObj );
    }
 
@@ -717,6 +610,27 @@ static void radeonInitTextureObjects( GLcontext *ctx )
    ctx->Texture.CurrentUnit = tmp;
 }
 
+/* Need:  
+ *  - Same GEN_MODE for all active bits
+ *  - Same EyePlane/ObjPlane for all active bits when using Eye/Obj
+ *  - STRQ presumably all supported (matrix means incoming R values
+ *    can end up in STQ, this has implications for vertex support,
+ *    presumably ok if maos is used, though?)
+ *  
+ * Basically impossible to do this on the fly - just collect some
+ * basic info & do the checks from ValidateState().
+ */
+static void radeonTexGen( GLcontext *ctx,
+			  GLenum coord,
+			  GLenum pname,
+			  const GLfloat *params )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+}
+
+
 void radeonInitTextureFuncs( GLcontext *ctx )
 {
    ctx->Driver.ChooseTextureFormat	= radeonChooseTextureFormat;
@@ -730,7 +644,7 @@ void radeonInitTextureFuncs( GLcontext *ctx )
    ctx->Driver.CopyTexImage2D		= _swrast_copy_teximage2d;
    ctx->Driver.CopyTexSubImage1D	= _swrast_copy_texsubimage1d;
    ctx->Driver.CopyTexSubImage2D	= _swrast_copy_texsubimage2d;
-   ctx->Driver.CopyTexSubImage3D	= _swrast_copy_texsubimage3d;
+   ctx->Driver.CopyTexSubImage3D 	= _swrast_copy_texsubimage3d;
    ctx->Driver.TestProxyTexImage	= _mesa_test_proxy_teximage;
 
    ctx->Driver.BindTexture		= radeonBindTexture;
@@ -743,6 +657,7 @@ void radeonInitTextureFuncs( GLcontext *ctx )
 
    ctx->Driver.TexEnv			= radeonTexEnv;
    ctx->Driver.TexParameter		= radeonTexParameter;
+   ctx->Driver.TexGen                   = radeonTexGen;
 
    radeonInitTextureObjects( ctx );
 }
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c
index 4e6b67dcb..f1dafef31 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c
@@ -37,7 +37,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_context.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
-#include "radeon_vb.h"
+#include "radeon_swtcl.h"
 #include "radeon_tex.h"
 
 #include "context.h"
@@ -57,7 +57,7 @@ void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
    if ( !t )
       return;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE ) {
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
       fprintf( stderr, __FUNCTION__"( %p, %p )\n", t, t->tObj );
    }
 
@@ -75,12 +75,14 @@ void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
 
       if ( t == rmesa->state.texture.unit[0].texobj ) {
          rmesa->state.texture.unit[0].texobj = NULL;
-         rmesa->state.hw.dirty &= ~RADEON_UPLOAD_TEX0;
+	 remove_from_list( &rmesa->hw.tex[0] );
+	 make_empty_list( &rmesa->hw.tex[0] );
       }
 
       if ( t == rmesa->state.texture.unit[1].texobj ) {
          rmesa->state.texture.unit[1].texobj = NULL;
-         rmesa->state.hw.dirty &= ~RADEON_UPLOAD_TEX1;
+	 remove_from_list( &rmesa->hw.tex[1] );
+	 make_empty_list( &rmesa->hw.tex[1] );
       }
    }
 
@@ -88,11 +90,12 @@ void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
    FREE( t );
 }
 
+
 /* Keep track of swapped out texture objects.
  */
 void radeonSwapOutTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
 {
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE ) {
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
       fprintf( stderr, __FUNCTION__"( %p, %p )\n", t, t->tObj );
    }
 
@@ -124,8 +127,7 @@ void radeonPrintLocalLRU( radeonContextPtr rmesa, int heap )
 		  t->memBlock->ofs,
 		  t->memBlock->size );
       } else {
-	 fprintf( stderr, "Texture (bound %d) at 0x%x sz 0x%x\n",
-		  t->bound,
+	 fprintf( stderr, "Texture at 0x%x sz 0x%x\n",
 		  t->memBlock->ofs,
 		  t->memBlock->size );
       }
@@ -332,7 +334,7 @@ static void radeonUploadSubImage( radeonContextPtr rmesa,
    drmRadeonTexture tex;
    drmRadeonTexImage tmp;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE ) {
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
       fprintf( stderr, __FUNCTION__"( %p, %p )\n", t, t->tObj );
    }
 
@@ -345,12 +347,12 @@ static void radeonUploadSubImage( radeonContextPtr rmesa,
 
    texImage = t->tObj->Image[level];
    if ( !texImage ) {
-      if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE )
+      if ( RADEON_DEBUG & DEBUG_TEXTURE )
 	 fprintf( stderr, __FUNCTION__ ": texImage %d is NULL!\n", level );
       return;
    }
    if ( !texImage->Data ) {
-      if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE )
+      if ( RADEON_DEBUG & DEBUG_TEXTURE )
 	 fprintf( stderr, __FUNCTION__ ": image data is NULL!\n" );
       return;
    }
@@ -382,7 +384,7 @@ static void radeonUploadSubImage( radeonContextPtr rmesa,
    rmesa->c_textureBytes += (dwords << 2);
 #endif
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_MSG ) {
+   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
       GLint imageX = 0;
       GLint imageY = 0;
       GLint blitX = t->image[level].x;
@@ -426,8 +428,6 @@ static void radeonUploadSubImage( radeonContextPtr rmesa,
 	       t->image[level].data );
       exit( 1 );
    }
-
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_MASKS );
 }
 
 /* Upload the texture images associated with texture `t'.  This might
@@ -439,8 +439,10 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t )
    const int numLevels = t->lastLevel - t->firstLevel + 1;
    int i;
    int heap;
+   radeonTexObjPtr t0 = rmesa->state.texture.unit[0].texobj;
+   radeonTexObjPtr t1 = rmesa->state.texture.unit[1].texobj;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE ) {
+   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
       fprintf( stderr, __FUNCTION__"( %p, %p ) sz=%d lvls=%d-%d\n",
 	       rmesa->glCtx, t->tObj, t->totalSize,
 	       t->firstLevel, t->lastLevel );
@@ -479,7 +481,8 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t )
 
       /* Kick out textures until the requested texture fits */
       while ( !t->memBlock ) {
-	 if ( rmesa->texture.objects[heap].prev->bound ) {
+	 if ( rmesa->texture.objects[heap].prev == t0 ||
+	      rmesa->texture.objects[heap].prev == t1 ) {
 	    fprintf( stderr,
 		     "radeonUploadTexImages: ran into bound texture\n" );
 	    UNLOCK_HARDWARE( rmesa );
@@ -526,11 +529,9 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t )
       }
 #endif
 
-      if ( t == rmesa->state.texture.unit[0].texobj )
-	 RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-
-      if ( t == rmesa->state.texture.unit[1].texobj )
-	 RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
+      /* Mark this texobj as dirty on all units:
+       */
+      t->dirty_state = TEX_ALL;
    }
 
    /* Let the world know we've used this memory recently */
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c
index d35b664c3..b7230ec3f 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c
@@ -37,8 +37,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_context.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
-#include "radeon_vb.h"
+#include "radeon_swtcl.h"
 #include "radeon_tex.h"
+#include "radeon_tcl.h"
 
 #include "colormac.h"
 #include "context.h"
@@ -228,6 +229,8 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
    t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
 		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
 
+   t->dirty_state = TEX_ALL;
+
    radeonUploadTexImages( rmesa, t );
 }
 
@@ -733,7 +736,7 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
    GLuint color_arg[3], alpha_arg[3];
    GLuint i, numColorArgs = 0, numAlphaArgs = 0;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
       fprintf( stderr, __FUNCTION__"( %p, %d ) format=%s\n",
 	       ctx, unit, _mesa_lookup_enum_by_nr( format ) );
    }
@@ -1071,54 +1074,222 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
       return;
    }
 
-   if ( rmesa->state.hw.texture[unit].pp_txcblend != color_combine ||
-	rmesa->state.hw.texture[unit].pp_txablend != alpha_combine ) {
-      RADEON_STATECHANGE( rmesa, (RADEON_UPLOAD_TEX0 << unit) );
-      rmesa->state.hw.texture[unit].pp_txcblend = color_combine;
-      rmesa->state.hw.texture[unit].pp_txablend = alpha_combine;
+   if ( rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] != color_combine ||
+	rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] != alpha_combine ) {
+      RADEON_STATECHANGE( rmesa, tex[unit] );
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] = color_combine;
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] = alpha_combine;
    }
 }
 
-static void radeonUpdateTextureUnit( GLcontext *ctx, int unit )
+#define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK |	\
+			      RADEON_MIN_FILTER_MASK | 		\
+			      RADEON_MAG_FILTER_MASK |		\
+			      RADEON_MAX_ANISO_MASK |		\
+			      RADEON_CLAMP_S_MASK | 		\
+			      RADEON_CLAMP_T_MASK)
+
+#define TEXOBJ_TXFORMAT_MASK (RADEON_TXFORMAT_WIDTH_MASK |	\
+			      RADEON_TXFORMAT_HEIGHT_MASK |	\
+			      RADEON_TXFORMAT_FORMAT_MASK |	\
+			      RADEON_TXFORMAT_ALPHA_IN_MAP)
+
+
+static void import_tex_obj_state( radeonContextPtr rmesa,
+				  int unit,
+				  radeonTexObjPtr texobj )
 {
+   GLuint *cmd = RADEON_DB_STATE( tex[unit] );
+
+   cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
+   cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+   texobj->dirty_state &= ~(1<<unit);
+
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
+}
+
+
+
+
+static void set_texgen_matrix( radeonContextPtr rmesa, 
+			       GLuint unit,
+			       GLfloat *s_plane,
+			       GLfloat *t_plane )
+{
+   static const GLfloat scale_identity[4] = { 1,1,1,1 };
+
+   if (!TEST_EQ_4V( s_plane, scale_identity) ||
+      !(TEST_EQ_4V( t_plane, scale_identity))) {
+      rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE<<unit;
+      rmesa->TexGenMatrix[unit].m[0]  = s_plane[0];
+      rmesa->TexGenMatrix[unit].m[4]  = s_plane[1];
+      rmesa->TexGenMatrix[unit].m[8]  = s_plane[2];
+      rmesa->TexGenMatrix[unit].m[12] = s_plane[3];
+
+      rmesa->TexGenMatrix[unit].m[1]  = t_plane[0];
+      rmesa->TexGenMatrix[unit].m[5]  = t_plane[1];
+      rmesa->TexGenMatrix[unit].m[9]  = t_plane[2];
+      rmesa->TexGenMatrix[unit].m[13] = t_plane[3];
+      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+}
+
+/* Ignoring the Q texcoord for now.
+ *
+ * Returns GL_FALSE if fallback required.  
+ */
+static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
+{  
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+   GLuint tmp = rmesa->TexGenEnabled;
+
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+   rmesa->TexGenNeedNormals[unit] = 0;
+
+   if (0)
+   fprintf(stderr, "%s unit %d cleared texgenEnabled %x\n", __FUNCTION__,
+	   unit, rmesa->TexGenEnabled);
+
+   if ((texUnit->TexGenEnabled & (S_BIT|T_BIT)) == 0) {
+      /* Disabled, no fallback:
+       */
+      rmesa->TexGenEnabled |= 
+	 (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+      return GL_TRUE;
+   }
+   else if (texUnit->TexGenEnabled & Q_BIT) {
+      /* Very easy to do this, in fact would remove a fallback case
+       * elsewhere, but I haven't done it yet...  Fallback: 
+       */
+      fprintf(stderr, "fallback Q_BIT\n");
+      return GL_FALSE;
+   }
+   else if ((texUnit->TexGenEnabled & (S_BIT|T_BIT)) != (S_BIT|T_BIT) ||
+	    texUnit->GenModeS != texUnit->GenModeT) {
+      /* Mixed modes, fallback:
+       */
+/*        fprintf(stderr, "fallback mixed texgen\n"); */
+      return GL_FALSE;
+   }
+   else
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
+
+   switch (texUnit->GenModeS) {
+   case GL_OBJECT_LINEAR:
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift;
+      set_texgen_matrix( rmesa, unit, 
+			 texUnit->ObjectPlaneS,
+			 texUnit->ObjectPlaneT);
+      break;
+
+   case GL_EYE_LINEAR:
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift;
+      set_texgen_matrix( rmesa, unit, 
+			 texUnit->EyePlaneS,
+			 texUnit->EyePlaneT);
+      break;
+
+   case GL_REFLECTION_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_REFLECT<<inputshift;
+      break;
+
+   case GL_NORMAL_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_NORMAL<<inputshift;
+      break;
+
+   case GL_SPHERE_MAP:
+   default:
+      /* Unsupported mode, fallback:
+       */
+      /*  fprintf(stderr, "fallback unsupported texgen\n"); */
+      return GL_FALSE;
+   }
+
+   if (tmp != rmesa->TexGenEnabled) {
+      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+
+/*     fprintf(stderr, "%s unit %d texgenEnabled %x\n", __FUNCTION__, */
+/*  	   unit, rmesa->TexGenEnabled); */
+   return GL_TRUE;
+}
+
+
+
+
+static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
 
    if ( texUnit->_ReallyEnabled & (TEXTURE0_1D|TEXTURE0_2D) ) {
       struct gl_texture_object *tObj = texUnit->_Current;
       radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
-      GLuint flag = RADEON_UPLOAD_TEX0 << unit;
       GLenum format;
 
       /* Fallback if there's a texture border */
-      if ( tObj->Image[tObj->BaseLevel]->Border > 0 ) {
-         FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE );
-         return;
-      }
+      if ( tObj->Image[tObj->BaseLevel]->Border > 0 )
+         return GL_FALSE;
 
       /* Upload teximages (not pipelined)
        */
       if ( t->dirty_images ) {
 	 RADEON_FIREVERTICES( rmesa );
 	 radeonSetTexImages( rmesa, tObj );
-	 if ( !t->memBlock ) {
-	    FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE );
-	    return;
-	 }
+	 /* Fallback if we can't upload:
+	  */
+	 if ( !t->memBlock ) 
+	    return GL_FALSE;
       }
 
       /* Update state if this is a different texture object to last
        * time.
        */
       if ( rmesa->state.texture.unit[unit].texobj != t ) {
-	 if ( rmesa->state.texture.unit[unit].texobj == NULL ) {
-	    RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-	    rmesa->state.hw.context.pp_cntl |= (RADEON_TEX_0_ENABLE |
-					     RADEON_TEX_BLEND_0_ENABLE)<<unit;
-	 }
-	 RADEON_STATECHANGE( rmesa, flag );
 	 rmesa->state.texture.unit[unit].texobj = t;
-	 radeonUpdateTexLRU( rmesa, t ); /* done too often */
+	 t->dirty_state |= 1<<unit;
+	 radeonUpdateTexLRU( rmesa, t ); /* XXX: should be locked! */
+      }
+
+
+      /* Newly enabled?
+       */
+      if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) {
+	 RADEON_STATECHANGE( rmesa, ctx );
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
+	    (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+
+	 RADEON_STATECHANGE( rmesa, tcl );
+
+	 if (unit == 0) 
+	    rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_ST0;
+	 else 
+	    rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_ST1;
+
+	 rmesa->recheck_texgen[unit] = GL_TRUE;
+      }
+
+      if (t->dirty_state & (1<<unit)) {
+	 import_tex_obj_state( rmesa, unit, t );
+      }
+      
+      if (rmesa->recheck_texgen[unit]) {
+	 GLboolean fallback = !radeon_validate_texgen( ctx, unit );
+	 TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+	 rmesa->recheck_texgen[unit] = 0;
+	 rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
       }
 
       format = tObj->Image[tObj->BaseLevel]->Format;
@@ -1130,23 +1301,72 @@ static void radeonUpdateTextureUnit( GLcontext *ctx, int unit )
       }
    }
    else if ( texUnit->_ReallyEnabled ) {
-      FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE );
-      return;
+      /* 3d textures, etc:
+       */
+      return GL_FALSE;
    }
-   else {
+   else if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) {
       /* Texture unit disabled */
       rmesa->state.texture.unit[unit].texobj = 0;
-      rmesa->state.hw.dirty &= ~(RADEON_UPLOAD_TEX0 << unit);
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-      rmesa->state.hw.context.pp_cntl &= ~((RADEON_TEX_0_ENABLE |
-					    RADEON_TEX_BLEND_0_ENABLE) << unit);
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= 
+	 ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit);
+
+      RADEON_STATECHANGE( rmesa, tcl );
+      switch (unit) {
+      case 0:
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_TCL_VTX_ST0 |
+						   RADEON_TCL_VTX_Q0);
+	    break;
+      case 1:
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_TCL_VTX_ST1 |
+						   RADEON_TCL_VTX_Q1);
+	 break;
+      default:
+      }
+
+
+      if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
+	 TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+	 rmesa->recheck_texgen[unit] = GL_TRUE;
+      }
+
+
+
+      {
+	 GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+	 GLuint tmp = rmesa->TexGenEnabled;
+
+	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+	 rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+	 rmesa->TexGenNeedNormals[unit] = 0;
+	 rmesa->TexGenEnabled |= 
+	    (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+
+	 if (tmp != rmesa->TexGenEnabled) {
+	    rmesa->recheck_texgen[unit] = GL_TRUE;
+	    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+	 }
+      }
    }
+
+   return GL_TRUE;
 }
 
 void radeonUpdateTextureState( GLcontext *ctx )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, GL_FALSE );
-   radeonUpdateTextureUnit( ctx, 0 );
-   radeonUpdateTextureUnit( ctx, 1 );
+   GLboolean ok;
+
+   ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
+	 radeonUpdateTextureUnit( ctx, 1 ));
+
+   FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
+
+   if (rmesa->TclFallback &&
+       ctx->Texture._ReallyEnabled != rmesa->last_ReallyEnabled) {
+      rmesa->last_ReallyEnabled = ctx->Texture._ReallyEnabled;
+      radeonChooseVertexState( ctx );
+   }
 }
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c
index 414dc072d..6d84f5c9c 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c
@@ -1,638 +1,961 @@
 /* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
 /*
- * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
  * Authors:
- *    Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
  */
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#include "radeon_vtxfmt.h"
 
-
-#include "glheader.h"
 #include "api_noop.h"
-#include "colormac.h"
+#include "api_arrayelt.h"
 #include "context.h"
-#include "light.h"
-#include "macros.h"
 #include "mem.h"
 #include "mmath.h"
 #include "mtypes.h"
-#include "simple_list.h"
+#include "enums.h"
+#include "glapi.h"
+#include "colormac.h"
+#include "light.h"
+#include "state.h"
 #include "vtxfmt.h"
 
-#include "math/m_xform.h"
 #include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_array_api.h"
 
-#include "radeon_context.h"
-#include "radeon_ioctl.h"
-#include "radeon_vb.h"
-#include "radeon_vtxfmt.h"
-
-
-struct radeon_imm_vertex {
-   /* The immediate mode vertex cache.
-    */
-   radeonTnlVertex vertices[8];
-
-   /* Current vertices out of the cache.  This makes the state machine
-    * a lot simpler, and avoids the need to swap lots of function
-    * pointers around.
-    */
-   radeonTnlVertex *v0;
-   radeonTnlVertex *v1;
-   radeonTnlVertex *v2;
-   radeonTnlVertex *v3;
-
-   radeon_flush_func *flush_tab;
-
-   void (*save_vertex)( GLcontext *ctx, radeonTnlVertex *v );
-   void (*flush_vertex)( GLcontext *ctx, radeonTnlVertex *v );
-
-   radeon_interp_func interp;
+struct radeon_vb vb;
 
-   GLuint prim;
-   GLuint format;
+static void radeonFlushVertices( GLcontext *, GLuint );
 
-   GLvertexformat vtxfmt;
-};
+static void count_func( const char *name,  struct dynfn *l )
+{
+   int i = 0;
+   struct dynfn *f;
+   foreach (f, l) i++;
+   if (i) fprintf(stderr, "%s: %d\n", name, i );
+}
 
+static void count_funcs( radeonContextPtr rmesa )
+{
+   count_func( "Vertex2f", &rmesa->vb.dfn_cache.Vertex2f );
+   count_func( "Vertex2fv", &rmesa->vb.dfn_cache.Vertex2fv );
+   count_func( "Vertex3f", &rmesa->vb.dfn_cache.Vertex3f );
+   count_func( "Vertex3fv", &rmesa->vb.dfn_cache.Vertex3fv );
+   count_func( "Color4ub", &rmesa->vb.dfn_cache.Color4ub );
+   count_func( "Color4ubv", &rmesa->vb.dfn_cache.Color4ubv );
+   count_func( "Color3ub", &rmesa->vb.dfn_cache.Color3ub );
+   count_func( "Color3ubv", &rmesa->vb.dfn_cache.Color3ubv );
+   count_func( "Color4f", &rmesa->vb.dfn_cache.Color4f );
+   count_func( "Color4fv", &rmesa->vb.dfn_cache.Color4fv );
+   count_func( "Color3f", &rmesa->vb.dfn_cache.Color3f );
+   count_func( "Color3fv", &rmesa->vb.dfn_cache.Color3fv );
+   count_func( "SecondaryColor3f", &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   count_func( "SecondaryColor3fv", &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   count_func( "SecondaryColor3ub", &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   count_func( "SecondaryColor3ubv", &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   count_func( "Normal3f", &rmesa->vb.dfn_cache.Normal3f );
+   count_func( "Normal3fv", &rmesa->vb.dfn_cache.Normal3fv );
+   count_func( "TexCoord2f", &rmesa->vb.dfn_cache.TexCoord2f );
+   count_func( "TexCoord2fv", &rmesa->vb.dfn_cache.TexCoord2fv );
+   count_func( "TexCoord1f", &rmesa->vb.dfn_cache.TexCoord1f );
+   count_func( "TexCoord1fv", &rmesa->vb.dfn_cache.TexCoord1fv );
+   count_func( "MultiTexCoord2fARB", &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   count_func( "MultiTexCoord2fvARB", &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   count_func( "MultiTexCoord1fARB", &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   count_func( "MultiTexCoord1fvARB", &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
+}
 
-#define VERTEX				radeonVertex
-#define TNL_VERTEX			radeonTnlVertex
 
+void radeon_copy_to_current( GLcontext *ctx ) 
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-#define LINTERP( T, A, B )		((A) + (T) * ((B) - (A)))
+   assert(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT);
+   assert(vb.context == ctx);
 
-#define INTERP_RGBA( t, out, a, b )					\
-do {									\
-   GLint i;								\
-   for ( i = 0 ; i < 4 ; i++ ) {					\
-      GLfloat fa = UBYTE_TO_FLOAT( a[i] );				\
-      GLfloat fb = UBYTE_TO_FLOAT( b[i] );				\
-      GLfloat fo = LINTERP( t, fa, fb );				\
-      UNCLAMPED_FLOAT_TO_UBYTE( out[i], fo );				\
-   }									\
-} while (0)
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_N0) {
+      ctx->Current.Normal[0] = vb.normalptr[0];
+      ctx->Current.Normal[1] = vb.normalptr[1];
+      ctx->Current.Normal[2] = vb.normalptr[2];
+   }
 
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_PKCOLOR) {
+      ctx->Current.Color[0] = UBYTE_TO_FLOAT( vb.ubytecolorptr[0] );
+      ctx->Current.Color[1] = UBYTE_TO_FLOAT( vb.ubytecolorptr[1] );
+      ctx->Current.Color[2] = UBYTE_TO_FLOAT( vb.ubytecolorptr[2] );
+      ctx->Current.Color[3] = UBYTE_TO_FLOAT( vb.ubytecolorptr[3] );
+   } 
+   
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_FPCOLOR) {
+      ctx->Current.Color[0] = vb.floatcolorptr[0];
+      ctx->Current.Color[1] = vb.floatcolorptr[1];
+      ctx->Current.Color[2] = vb.floatcolorptr[2];
+   }
 
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_FPALPHA)
+      ctx->Current.Color[3] = vb.floatcolorptr[3];
+      
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_PKSPEC) {
+      ctx->Current.SecondaryColor[0] = UBYTE_TO_FLOAT( vb.ubytespecptr[0] );
+      ctx->Current.SecondaryColor[1] = UBYTE_TO_FLOAT( vb.ubytespecptr[1] );
+      ctx->Current.SecondaryColor[2] = UBYTE_TO_FLOAT( vb.ubytespecptr[2] );
+   } 
+
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_ST0) {
+      ctx->Current.Texcoord[0][0] = vb.texcoordptr[0][0];
+      ctx->Current.Texcoord[0][1] = vb.texcoordptr[0][1];
+      ctx->Current.Texcoord[0][2] = 0.0F;
+      ctx->Current.Texcoord[0][3] = 1.0F;
+   }
 
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_ST1) {
+      ctx->Current.Texcoord[1][0] = vb.texcoordptr[1][0];
+      ctx->Current.Texcoord[1][1] = vb.texcoordptr[1][1];
+      ctx->Current.Texcoord[1][2] = 0.0F;
+      ctx->Current.Texcoord[1][3] = 1.0F;
+   }
 
-/* ================================================================
- * Color functions:  Always update ctx->Current.*
- */
+   ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
+}
 
-/* ================================================================
- * Material functions:
- */
+static GLboolean discreet_gl_prim[GL_POLYGON+1] = {
+   1,				/* 0 points */
+   1,				/* 1 lines */
+   0,				/* 2 line_strip */
+   0,				/* 3 line_loop */
+   1,				/* 4 tris */
+   0,				/* 5 tri_fan */
+   0,				/* 6 tri_strip */
+   1,				/* 7 quads */
+   0,				/* 8 quadstrip */
+   0,				/* 9 poly */
+};
 
-static __inline void radeon_recalc_base_color( GLcontext *ctx )
+static void flush_prims( radeonContextPtr rmesa )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   struct gl_light *light;
-
-   COPY_3V( rmesa->state.light.base_color, ctx->Light._BaseColor[0] );
-   foreach ( light, &ctx->Light.EnabledList ) {
-      ACC_3V( rmesa->state.light.base_color, light->_MatAmbient[0] );
+   int i,j;
+   struct radeon_dma_region tmp = rmesa->dma.current;
+   
+   tmp.buf->refcount++;
+   tmp.aos_size = vb.vertex_size;
+   tmp.aos_stride = vb.vertex_size;
+   tmp.aos_start = GET_START(&tmp);
+
+   rmesa->dma.current.ptr = rmesa->dma.current.start += 
+      (vb.initial_counter - vb.counter) * vb.vertex_size * 4; 
+
+   rmesa->tcl.vertex_format = rmesa->vb.vertex_format;
+   rmesa->tcl.aos_components[0] = &tmp;
+   rmesa->tcl.nr_aos_components = 1;
+   rmesa->dma.flush = 0;
+
+   /* Optimize the primitive list:
+    */
+   if (rmesa->vb.nrprims > 1) {
+      for (j = 0, i = 1 ; i < rmesa->vb.nrprims; i++) {
+	 int pj = rmesa->vb.primlist[j].prim & 0xf;
+	 int pi = rmesa->vb.primlist[i].prim & 0xf;
+      
+	 if (pj == pi && discreet_gl_prim[pj] &&
+	     rmesa->vb.primlist[i].start == rmesa->vb.primlist[j].end) {
+	    rmesa->vb.primlist[j].end = rmesa->vb.primlist[i].end;
+	 }
+	 else {
+	    j++;
+	    if (j != i) rmesa->vb.primlist[j] = rmesa->vb.primlist[i];
+	 }
+      }
+      rmesa->vb.nrprims = j+1;
    }
-    
-   UNCLAMPED_FLOAT_TO_UBYTE( rmesa->state.light.base_alpha, 
-			     ctx->Light.Material[0].Diffuse[3] );
-}
-
 
-/* ================================================================
- * Normal functions:
- */
-
-struct radeon_norm_tab {
-   void (*normal3f_multi)( GLfloat x, GLfloat y, GLfloat z );
-   void (*normal3fv_multi)( const GLfloat *v );
-   void (*normal3f_single)( GLfloat x, GLfloat y, GLfloat z );
-   void (*normal3fv_single)( const GLfloat *v );
-};
-
-static struct radeon_norm_tab norm_tab[0x4];
+   for (i = 0 ; i < rmesa->vb.nrprims; i++) {
+      if (RADEON_DEBUG & DEBUG_PRIMS)
+	 fprintf(stderr, "vtxfmt prim %d: %s %d..%d\n", i,
+		 _mesa_lookup_enum_by_nr( rmesa->vb.primlist[i].prim & 
+					  PRIM_MODE_MASK ),
+		 rmesa->vb.primlist[i].start,
+		 rmesa->vb.primlist[i].end);
+
+      radeonEmitPrimitive( vb.context,
+			   rmesa->vb.primlist[i].start,
+			   rmesa->vb.primlist[i].end,
+			   rmesa->vb.primlist[i].prim );
+   }
 
+   rmesa->vb.nrprims = 0;
+   radeonReleaseDmaRegion( rmesa, &tmp, __FUNCTION__ );
+}
 
-#define HAVE_HW_LIGHTING 0
 
-#define GET_CURRENT_VERTEX						\
-   GET_CURRENT_CONTEXT(ctx);						\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
-   radeonTnlVertexPtr v = rmesa->imm.v0
+static void start_prim( radeonContextPtr rmesa, GLuint mode )
+{
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, vb.initial_counter - vb.counter);
 
-#define CURRENT_NORMAL			rmesa->state.current.normal
-#define BASE_COLOR			rmesa->state.light.base_color
-#define BASE_ALPHA			rmesa->state.light.base_alpha
+   rmesa->vb.primlist[rmesa->vb.nrprims].start = vb.initial_counter - vb.counter;
+   rmesa->vb.primlist[rmesa->vb.nrprims].prim = mode;
+}
 
-#define VERT_COLOR( COMP )		v->color[COMP]
+static void note_last_prim( radeonContextPtr rmesa, GLuint flags )
+{
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, vb.initial_counter - vb.counter);
 
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      rmesa->vb.primlist[rmesa->vb.nrprims].prim |= flags;
+      rmesa->vb.primlist[rmesa->vb.nrprims].end = vb.initial_counter - vb.counter;
 
-#define IND (0)
-#define TAG(x) radeon_##x
-#define PRESERVE_NORMAL_DEFS
-#include "tnl_dd/t_dd_imm_napi.h"
+      if (++(rmesa->vb.nrprims) == RADEON_MAX_PRIMS)
+	 flush_prims( rmesa );
+   }
+}
 
-#define IND (NORM_RESCALE)
-#define TAG(x) radeon_##x##_rescale
-#define PRESERVE_NORMAL_DEFS
-#include "tnl_dd/t_dd_imm_napi.h"
 
-#define IND (NORM_NORMALIZE)
-#define TAG(x) radeon_##x##_normalize
-#include "tnl_dd/t_dd_imm_napi.h"
+static void copy_vertex( radeonContextPtr rmesa, GLuint n, GLfloat *dst )
+{
+   GLuint i;
+   GLfloat *src = (GLfloat *)(rmesa->dma.current.address + 
+			      rmesa->dma.current.ptr + 
+			      (rmesa->vb.primlist[rmesa->vb.nrprims].start + n) * 
+			      vb.vertex_size * 4);
 
+   if (RADEON_DEBUG & DEBUG_VFMT) 
+      fprintf(stderr, "copy_vertex %d\n", rmesa->vb.primlist[rmesa->vb.nrprims].start + n);
 
-static void radeon_init_norm_funcs( void )
-{
-   radeon_init_norm();
-   radeon_init_norm_rescale();
-   radeon_init_norm_normalize();
+   for (i = 0 ; i < vb.vertex_size; i++) {
+      dst[i] = src[i];
+   }
 }
 
-static void radeon_choose_Normal3f( GLfloat x, GLfloat y, GLfloat z )
+/* NOTE: This actually reads the copied vertices back from uncached
+ * memory.  Could also use the counter/notify mechanism to populate
+ * tmp on the fly as vertices are generated.  
+ */
+static GLuint copy_dma_verts( radeonContextPtr rmesa, GLfloat (*tmp)[15] )
 {
-   GET_CURRENT_CONTEXT(ctx);
-   GLuint index;
-
-   if ( ctx->Light.Enabled ) {
-      if ( ctx->Transform.Normalize ) {
-	 index = NORM_NORMALIZE;
-      }
-      else if ( !ctx->Transform.RescaleNormals &&
-		ctx->_ModelViewInvScale != 1.0 ) {
-	 index = NORM_RESCALE;
-      }
-      else {
-	 index = 0;
-      }
-
-      if ( ctx->Light.EnabledList.next == ctx->Light.EnabledList.prev ) {
-	 ctx->Exec->Normal3f  = norm_tab[index].normal3f_single;
+   GLuint ovf, i;
+   GLuint nr = (vb.initial_counter - vb.counter) - rmesa->vb.primlist[rmesa->vb.nrprims].start;
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d verts\n", __FUNCTION__, nr);
+
+   switch( rmesa->vb.prim[0] )
+   {
+   case GL_POINTS:
+      return 0;
+   case GL_LINES:
+      ovf = nr&1;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_TRIANGLES:
+      ovf = nr%3;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_QUADS:
+      ovf = nr&3;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_LINE_STRIP:
+      if (nr == 0) 
+	 return 0;
+      copy_vertex( rmesa, nr-1, tmp[0] );
+      return 1;
+   case GL_LINE_LOOP:
+   case GL_TRIANGLE_FAN:
+   case GL_POLYGON:
+      if (nr == 0) 
+	 return 0;
+      else if (nr == 1) {
+	 copy_vertex( rmesa, 0, tmp[0] );
+	 return 1;
       } else {
-	 ctx->Exec->Normal3f  = norm_tab[index].normal3f_multi;
+	 copy_vertex( rmesa, 0, tmp[0] );
+	 copy_vertex( rmesa, nr-1, tmp[1] );
+	 return 2;
       }
-   } else {
-      ctx->Exec->Normal3f  = _mesa_noop_Normal3f;
+   case GL_TRIANGLE_STRIP:
+      ovf = MIN2( nr-1, 2 );
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_QUAD_STRIP:
+      ovf = MIN2( nr-1, 2 );
+      if (nr > 2) ovf += nr&1;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   default:
+      assert(0);
+      return 0;
    }
-
-   glNormal3f( x, y, z );
 }
 
-static void radeon_choose_Normal3fv( const GLfloat *v )
+static void VFMT_FALLBACK_OUTSIDE_BEGIN_END( const char *caller )
 {
-   GET_CURRENT_CONTEXT(ctx);
-   GLuint index;
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   if ( ctx->Light.Enabled ) {
-      if ( ctx->Transform.Normalize ) {
-	 index = NORM_NORMALIZE;
-      }
-      else if ( !ctx->Transform.RescaleNormals &&
-		ctx->_ModelViewInvScale != 1.0 ) {
-	 index = NORM_RESCALE;
-      }
-      else {
-	 index = 0;
-      }
+   if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
 
-      if ( ctx->Light.EnabledList.next == ctx->Light.EnabledList.prev ) {
-	 ctx->Exec->Normal3fv = norm_tab[index].normal3fv_single;
-      } else {
-	 ctx->Exec->Normal3fv = norm_tab[index].normal3fv_multi;
-      }
-   } else {
-      ctx->Exec->Normal3fv = _mesa_noop_Normal3fv;
-   }
+   if (ctx->Driver.NeedFlush) 
+      radeonFlushVertices( ctx, ctx->Driver.NeedFlush );
 
-   glNormal3fv( v );
-}
+   if (ctx->NewState)
+      _mesa_update_state( ctx ); /* clear state so fell_back sticks */
 
+   _tnl_wakeup_exec( ctx );
 
+   assert( rmesa->dma.flush == 0 );
+   rmesa->vb.fell_back = GL_TRUE;
+   rmesa->vb.installed = GL_FALSE;
+   vb.context = 0;
+}
 
 
-/* ================================================================
- * Texture functions:
- */
+static void VFMT_FALLBACK( const char *caller )
+{
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat tmp[3][15];
+   GLuint i, prim;
+   GLuint ind = rmesa->vb.vertex_format;
+   GLuint nrverts;
+   GLfloat alpha = 1.0;
 
-#define GET_CURRENT							\
-   GET_CURRENT_CONTEXT(ctx);						\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+   if (RADEON_DEBUG & (DEBUG_FALLBACKS|DEBUG_VFMT))
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
 
-#define NUM_TEXTURE_UNITS		RADEON_MAX_TEXTURE_UNITS
-#define DO_PROJ_TEX
+   if (rmesa->vb.prim[0] == GL_POLYGON+1) {
+      VFMT_FALLBACK_OUTSIDE_BEGIN_END( __FUNCTION__ );
+      return;
+   }
 
-#define CURRENT_TEXTURE( unit )		rmesa->state.current.texture[unit]
+   /* Copy vertices out of dma:
+    */
+   nrverts = copy_dma_verts( rmesa, tmp );
 
-#define TAG(x) radeon_##x
-#include "tnl_dd/t_dd_imm_tapi.h"
+   /* Finish the prim at this point:
+    */
+   note_last_prim( rmesa, 0 );
+   flush_prims( rmesa );
 
+   /* Update ctx->Driver.CurrentExecPrimitive and swap in swtnl. 
+    */
+   prim = rmesa->vb.prim[0];
+   ctx->Driver.CurrentExecPrimitive = GL_POLYGON+1;
+   _tnl_wakeup_exec( ctx );
 
+   assert(rmesa->dma.flush == 0);
+   rmesa->vb.fell_back = GL_TRUE;
+   rmesa->vb.installed = GL_FALSE;
+   vb.context = 0;
+   glBegin( prim );
+   
+   if (rmesa->vb.installed_color_3f_sz == 4)
+      alpha = ctx->Current.Color[3];
 
-/* ================================================================
- * Vertex functions:
- */
+   /* Replay saved vertices
+    */
+   for (i = 0 ; i < nrverts; i++) {
+      GLuint offset = 3;
+      if (ind & RADEON_CP_VC_FRMT_N0) {
+	 glNormal3fv( &tmp[i][offset] ); 
+	 offset += 3;
+      }
 
-#define GET_CURRENT_VERTEX						\
-   GET_CURRENT_CONTEXT(ctx);						\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
-   radeonTnlVertexPtr v = rmesa->imm.v0
+      if (ind & RADEON_CP_VC_FRMT_PKCOLOR) {
+	 glColor4ubv( (GLubyte *)&tmp[i][offset] ); 
+	 offset++;
+      }
+      else if (ind & RADEON_CP_VC_FRMT_FPALPHA) {
+	 glColor4fv( &tmp[i][offset] ); 
+	 offset+=4;
+      } 
+      else if (ind & RADEON_CP_VC_FRMT_FPCOLOR) {
+	 glColor3fv( &tmp[i][offset] ); 
+	 offset+=3;
+      }
 
-#define CURRENT_VERTEX			v->obj
-#define SAVE_VERTEX			rmesa->imm.save_vertex( ctx, v )
+      if (ind & RADEON_CP_VC_FRMT_PKSPEC) {
+	 _glapi_Dispatch->SecondaryColor3ubvEXT( (GLubyte *)&tmp[i][offset] ); 
+	 offset++;
+      }
 
-#define TAG(x) radeon_##x
-#include "tnl_dd/t_dd_imm_vapi.h"
+      if (ind & RADEON_CP_VC_FRMT_ST0) {
+	 glTexCoord2fv( &tmp[i][offset] ); 
+	 offset += 2;
+      }
 
+      if (ind & RADEON_CP_VC_FRMT_ST1) {
+	 glMultiTexCoord2fvARB( GL_TEXTURE1_ARB, &tmp[i][offset] );
+	 offset += 2;
+      }
+      glVertex3fv( &tmp[i][0] );
+   }
 
+   /* Replay current vertex
+    */
+   if (ind & RADEON_CP_VC_FRMT_N0) 
+      glNormal3fv( vb.normalptr );
+
+   if (ind & RADEON_CP_VC_FRMT_PKCOLOR) 
+      glColor4ubv( vb.ubytecolorptr );
+   else if (ind & RADEON_CP_VC_FRMT_FPALPHA)
+      glColor4fv( vb.floatcolorptr );
+   else if (ind & RADEON_CP_VC_FRMT_FPCOLOR) {
+      if (rmesa->vb.installed_color_3f_sz == 4 && alpha != 1.0)
+	 glColor4f( vb.floatcolorptr[0],
+		    vb.floatcolorptr[1],
+		    vb.floatcolorptr[2],
+		    alpha );
+      else
+	 glColor3fv( vb.floatcolorptr );
+   }
 
+   if (ind & RADEON_CP_VC_FRMT_PKSPEC) 
+      _glapi_Dispatch->SecondaryColor3ubvEXT( vb.ubytespecptr ); 
 
-struct radeon_vert_tab {
-   void (*save_vertex)( GLcontext *ctx, radeonTnlVertexPtr v );
-   void (*interpolate_vertex)( GLfloat t,
-			       radeonTnlVertex *O,
-			       const radeonTnlVertex *I,
-			       const radeonTnlVertex *J );
-};
+   if (ind & RADEON_CP_VC_FRMT_ST0) 
+      glTexCoord2fv( vb.texcoordptr[0] );
 
-static struct radeon_vert_tab vert_tab[0xf];
+   if (ind & RADEON_CP_VC_FRMT_ST1) 
+      glMultiTexCoord2fvARB( GL_TEXTURE1_ARB, vb.texcoordptr[1] );
+}
 
-#define VTX_NORMAL	0x0
-#define VTX_RGBA	0x1
-#define VTX_SPEC	0x2
-#define VTX_TEX0	0x4
-#define VTX_TEX1	0x8
 
-#define LOCAL_VARS							\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
 
-#define CURRENT_COLOR			rmesa->state.current.color
-#define CURRENT_SPECULAR		rmesa->state.current.specular
+static void wrap_buffer( void )
+{
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat tmp[3][15];
+   GLuint i, nrverts;
 
-#define CURRENT_NORMAL( COMP )		rmesa->state.current.normal[COMP]
-#define CURRENT_TEXTURE( U, COMP )	rmesa->state.current.texture[U][COMP]
+   if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_PRIMS))
+      fprintf(stderr, "%s %d\n", __FUNCTION__, vb.initial_counter - vb.counter);
 
-#define FLUSH_VERTEX			rmesa->imm.flush_vertex( ctx, v );
+   /* Don't deal with parity.
+    */
+   if ((((vb.initial_counter - vb.counter) -  
+	 rmesa->vb.primlist[rmesa->vb.nrprims].start) & 1)) {
+      vb.counter++;
+      vb.initial_counter++;
+      return;
+   }
 
+   /* Copy vertices out of dma:
+    */
+   nrverts = copy_dma_verts( rmesa, tmp );
 
-#define IND (VTX_NORMAL)
-#define TAG(x) radeon_##x##_NORMAL
-#define PRESERVE_VERTEX_DEFS
-#include "tnl_dd/t_dd_imm_vertex.h"
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%d vertices to copy\n", nrverts);
+   
 
-#define IND (VTX_NORMAL|VTX_TEX0)
-#define TAG(x) radeon_##x##_NORMAL_TEX0
-#define PRESERVE_VERTEX_DEFS
-#include "tnl_dd/t_dd_imm_vertex.h"
+   /* Finish the prim at this point:
+    */
+   note_last_prim( rmesa, 0 );
+   flush_prims( rmesa );
 
-#define IND (VTX_NORMAL|VTX_TEX0|VTX_TEX1)
-#define TAG(x) radeon_##x##_NORMAL_TEX0_TEX1
-#define PRESERVE_VERTEX_DEFS
-#include "tnl_dd/t_dd_imm_vertex.h"
+   /* Get new buffer
+    */
+   radeonRefillCurrentDmaRegion( rmesa );
 
-#define IND (VTX_RGBA)
-#define TAG(x) radeon_##x##_RGBA
-#define PRESERVE_VERTEX_DEFS
-#include "tnl_dd/t_dd_imm_vertex.h"
+   /* Reset counter, dmaptr
+    */
+   vb.dmaptr = (int *)(rmesa->dma.current.ptr + rmesa->dma.current.address);
+   vb.counter = (rmesa->dma.current.end - rmesa->dma.current.ptr) / 
+      (vb.vertex_size * 4);
+   vb.counter--;
+   vb.initial_counter = vb.counter;
+   vb.notify = wrap_buffer;
 
-#define IND (VTX_RGBA|VTX_TEX0)
-#define TAG(x) radeon_##x##_RGBA_TEX0
-#define PRESERVE_VERTEX_DEFS
-#include "tnl_dd/t_dd_imm_vertex.h"
+   rmesa->dma.flush = flush_prims;
+   start_prim( rmesa, rmesa->vb.prim[0] );
 
-#define IND (VTX_RGBA|VTX_TEX1)
-#define TAG(x) radeon_##x##_RGBA_TEX0_TEX1
-#include "tnl_dd/t_dd_imm_vertex.h"
 
+   /* Reemit saved vertices
+    */
+   for (i = 0 ; i < nrverts; i++) {
+      if (RADEON_DEBUG & DEBUG_VERTS) {
+	 int j;
+	 fprintf(stderr, "re-emit vertex %d to %p\n", i, vb.dmaptr);
+	 if (RADEON_DEBUG & DEBUG_VERBOSE)
+	    for (j = 0 ; j < vb.vertex_size; j++) 
+	       fprintf(stderr, "\t%08x/%f\n", *(int*)&tmp[i][j], tmp[i][j]);
+      }
 
-static void radeon_init_vert_funcs( void )
-{
-   radeon_init_vert_NORMAL();
-   radeon_init_vert_NORMAL_TEX0();
-   radeon_init_vert_NORMAL_TEX0_TEX1();
-   radeon_init_vert_RGBA();
-   radeon_init_vert_RGBA_TEX0();
-   radeon_init_vert_RGBA_TEX0_TEX1();
+      memcpy( vb.dmaptr, tmp[i], vb.vertex_size * 4 );
+      vb.dmaptr += vb.vertex_size;
+      vb.counter--;
+   }
 }
 
 
 
+static GLboolean check_vtx_fmt( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint ind = RADEON_CP_VC_FRMT_Z;
 
+   if (rmesa->TclFallback || rmesa->vb.fell_back || ctx->CompileFlag)
+      return GL_FALSE;
 
+   if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) 
+      ctx->Driver.FlushVertices( ctx, FLUSH_UPDATE_CURRENT );
+   
+   /* Make all this event-driven:
+    */
+   if (ctx->Light.Enabled) {
+      ind |= RADEON_CP_VC_FRMT_N0;
+
+      /* TODO: make this data driven: If we receive only ubytes, send
+       * color as ubytes.  Also check if converting (with free
+       * checking for overflow) is cheaper than sending floats
+       * directly.
+       */
+      if (ctx->Light.ColorMaterialEnabled) {
+	 ind |= RADEON_CP_VC_FRMT_FPCOLOR;
+         if (ctx->Color.AlphaEnabled) {
+	    ind |= RADEON_CP_VC_FRMT_FPALPHA;
+         }
+      }
+   }
+   else {
+      /* TODO: make this data driven?
+       */
+      ind |= RADEON_CP_VC_FRMT_PKCOLOR;
+	 
+      if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+	 ind |= RADEON_CP_VC_FRMT_PKSPEC;
+      }
+   }
 
+   if (ctx->Texture.Unit[0]._ReallyEnabled) {
+      if (ctx->Texture.Unit[0].TexGenEnabled) {
+	 if (rmesa->TexGenNeedNormals[0]) {
+	    ind |= RADEON_CP_VC_FRMT_N0;
+	 }
+      } else {
+	 if (ctx->Current.Texcoord[0][2] != 0.0F ||
+	     ctx->Current.Texcoord[0][3] != 1.0) {
+	    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+	       fprintf(stderr, "%s: rq0\n", __FUNCTION__);
+	    return GL_FALSE;
+	 }
+	 ind |= RADEON_CP_VC_FRMT_ST0;
+      }
+   }
 
-#define LOCAL_VARS							\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+   if (ctx->Texture.Unit[1]._ReallyEnabled) {
+      if (ctx->Texture.Unit[1].TexGenEnabled) {
+	 if (rmesa->TexGenNeedNormals[1]) {
+	    ind |= RADEON_CP_VC_FRMT_N0;
+	 }
+      } else {
+	 if (ctx->Current.Texcoord[1][2] != 0.0F ||
+	     ctx->Current.Texcoord[1][3] != 1.0) {
+	    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+	       fprintf(stderr, "%s: rq1\n", __FUNCTION__);
+	    return GL_FALSE;
+	 }
+	 ind |= RADEON_CP_VC_FRMT_ST1;
+      }
+   }
 
-#define GET_INTERP_FUNC							\
-   radeon_interp_func interp = rmesa->imm.interp
+   if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_STATE))
+      fprintf(stderr, "%s: format: 0x%x\n", __FUNCTION__, ind );
 
-#define FLUSH_VERTEX			rmesa->imm.flush_vertex
+   RADEON_NEWPRIM(rmesa);
+   rmesa->vb.vertex_format = ind;
+   vb.vertex_size = 3;
+   rmesa->vb.prim = &ctx->Driver.CurrentExecPrimitive;
 
-#define IMM_VERTEX( V )			rmesa->imm.V
-#define IMM_VERTICES( n )		rmesa->imm.vertices[n]
+   vb.normalptr = ctx->Current.Normal;
+   vb.ubytecolorptr = 0;
+   vb.floatcolorptr = ctx->Current.Color;
+   vb.texcoordptr[0] = ctx->Current.Texcoord[0];
+   vb.texcoordptr[1] = ctx->Current.Texcoord[1];
+
+   /* Run through and initialize the vertex components in the order
+    * the hardware understands:
+    */
+   if (ind & RADEON_CP_VC_FRMT_N0) {
+      vb.normalptr = &vb.vertex[vb.vertex_size].f;
+      vb.vertex_size += 3;
+      vb.normalptr[0] = ctx->Current.Normal[0];
+      vb.normalptr[1] = ctx->Current.Normal[1];
+      vb.normalptr[2] = ctx->Current.Normal[2];
+   }
 
+   if (ind & RADEON_CP_VC_FRMT_PKCOLOR) {
+      vb.ubytecolorptr = &vb.vertex[vb.vertex_size].ub4[0];
+      vb.vertex_size += 1;
+      UNCLAMPED_FLOAT_TO_RGBA_CHAN( vb.ubytecolorptr, ctx->Current.Color );
+   }
 
-/* TINY_VERTEX_FORMAT:
- */
-#define GET_VERTEX_SPACE( n ) radeonAllocDmaLow( rmesa, n * 16, __FUNCTION__ )
-
-#define EMIT_VERTEX( vb, v )						\
-do {									\
-   vb[0] = *(GLuint *)&(v->clip[0]);					\
-   vb[1] = *(GLuint *)&(v->clip[1]);					\
-   vb[2] = *(GLuint *)&(v->clip[2]);					\
-   vb[3] = *(GLuint *)&(v->color);					\
-   vb += 4;								\
-} while (0)
+   if (ind & RADEON_CP_VC_FRMT_FPCOLOR) {
+      assert(!(ind & RADEON_CP_VC_FRMT_PKCOLOR));
+      vb.floatcolorptr = &vb.vertex[vb.vertex_size].f;
+      vb.vertex_size += 3;
+      vb.floatcolorptr[0] = ctx->Current.Color[0];
+      vb.floatcolorptr[1] = ctx->Current.Color[1];
+      vb.floatcolorptr[2] = ctx->Current.Color[2];
+
+      if (ind & RADEON_CP_VC_FRMT_FPALPHA) {
+	 vb.vertex_size += 1;
+	 vb.floatcolorptr[3] = ctx->Current.Color[3];
+      }
+   }
+   
+   if (ind & RADEON_CP_VC_FRMT_PKSPEC) {
+      vb.ubytespecptr = &vb.vertex[vb.vertex_size].ub4[0];
+      vb.vertex_size += 1;
+      UNCLAMPED_FLOAT_TO_RGB_CHAN( vb.ubytespecptr, 
+				   ctx->Current.SecondaryColor );
+   }
 
-#define TAG(x) radeon_##x##_tiny
-#define PRESERVE_PRIM_DEFS
-#include "tnl_dd/t_dd_imm_primtmp.h"
 
+   if (ind & RADEON_CP_VC_FRMT_ST0) {
+      vb.texcoordptr[0] = &vb.vertex[vb.vertex_size].f;
+      vb.vertex_size += 2;
+      vb.texcoordptr[0][0] = ctx->Current.Texcoord[0][0];
+      vb.texcoordptr[0][1] = ctx->Current.Texcoord[0][1];   
+   } 
+
+   if (ind & RADEON_CP_VC_FRMT_ST1) {
+      vb.texcoordptr[1] = &vb.vertex[vb.vertex_size].f;
+      vb.vertex_size += 2;
+      vb.texcoordptr[1][0] = ctx->Current.Texcoord[1][0];
+      vb.texcoordptr[1][1] = ctx->Current.Texcoord[1][1];
+   } 
+
+   if (rmesa->vb.installed_vertex_format != rmesa->vb.vertex_format) {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "reinstall on vertex_format change\n");
+      _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+      rmesa->vb.installed_vertex_format = rmesa->vb.vertex_format;
+   }
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s -- success\n", __FUNCTION__);
+   
+   return GL_TRUE;
+}
 
-/* NOTEX_VERTEX_FORMAT:
- */
-#define GET_VERTEX_SPACE( n ) radeonAllocDmaLow( rmesa, n * 24, __FUNCTION__ )
-
-#define EMIT_VERTEX( vb, v )						\
-do {									\
-   vb[0] = *(GLuint *)&(v->clip[0]);					\
-   vb[1] = *(GLuint *)&(v->clip[1]);					\
-   vb[2] = *(GLuint *)&(v->clip[2]);					\
-   vb[3] = *(GLuint *)&(v->clip[3]);					\
-   vb[4] = *(GLuint *)&(v->color);					\
-   vb[5] = *(GLuint *)&(v->specular);					\
-   vb += 6;								\
-} while (0)
 
-#define TAG(x) radeon_##x##_notex
-#define PRESERVE_PRIM_DEFS
-#include "tnl_dd/t_dd_imm_primtmp.h"
+void radeonVtxfmtInvalidate( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 
+   rmesa->vb.recheck = GL_TRUE;
+   rmesa->vb.fell_back = GL_FALSE;
+}
 
-/* TEX0_VERTEX_FORMAT:
- */
-#define GET_VERTEX_SPACE( n ) radeonAllocDmaLow( rmesa, n * 32, __FUNCTION__ )
-
-#define EMIT_VERTEX( vb, v )						\
-do {									\
-   vb[0] = *(GLuint *)&(v->clip[0]);					\
-   vb[1] = *(GLuint *)&(v->clip[1]);					\
-   vb[2] = *(GLuint *)&(v->clip[2]);					\
-   vb[3] = *(GLuint *)&(v->clip[3]);					\
-   vb[4] = *(GLuint *)&(v->color);					\
-   vb[5] = *(GLuint *)&(v->specular);					\
-   vb[6] = *(GLuint *)&(v->texture[0][0]);				\
-   vb[7] = *(GLuint *)&(v->texture[0][1]);				\
-   vb += 8;								\
-} while (0)
 
-#define TAG(x) radeon_##x##_tex0
-#define PRESERVE_PRIM_DEFS
-#include "tnl_dd/t_dd_imm_primtmp.h"
+static void radeonNewList( GLcontext *ctx, GLuint list, GLenum mode )
+{
+   VFMT_FALLBACK_OUTSIDE_BEGIN_END( __FUNCTION__ );
+}
 
 
-/* TEX1_VERTEX_FORMAT:
- */
-#define GET_VERTEX_SPACE( n ) radeonAllocDmaLow( rmesa, n * 40, __FUNCTION__ )
-
-#define EMIT_VERTEX( vb, v )						\
-do {									\
-   vb[0] = *(GLuint *)&(v->clip[0]);					\
-   vb[1] = *(GLuint *)&(v->clip[1]);					\
-   vb[2] = *(GLuint *)&(v->clip[2]);					\
-   vb[3] = *(GLuint *)&(v->clip[3]);					\
-   vb[4] = *(GLuint *)&(v->color);					\
-   vb[5] = *(GLuint *)&(v->specular);					\
-   vb[6] = *(GLuint *)&(v->texture[0][0]);				\
-   vb[7] = *(GLuint *)&(v->texture[0][1]);				\
-   vb[8] = *(GLuint *)&(v->texture[1][0]);				\
-   vb[9] = *(GLuint *)&(v->texture[1][1]);				\
-   vb += 10;								\
-} while (0)
+static void radeonVtxfmtValidate( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 
-#define TAG(x) radeon_##x##_tex1
-#define PRESERVE_PRIM_DEFS
-#include "tnl_dd/t_dd_imm_primtmp.h"
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
+   if (ctx->Driver.NeedFlush)
+      ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush );
 
+   rmesa->vb.recheck = GL_FALSE;
 
+   if (check_vtx_fmt( ctx )) {
+      if (!rmesa->vb.installed) {
+	 if (RADEON_DEBUG & DEBUG_VFMT)
+	    fprintf(stderr, "reinstall (new install)\n");
 
+	 _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+	 ctx->Driver.FlushVertices = radeonFlushVertices;
+	 ctx->Driver.NewList = radeonNewList;
+	 rmesa->vb.installed = GL_TRUE;
+	 vb.context = ctx;
+      }
+      else if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s: already installed", __FUNCTION__);
+   } 
+   else {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s: failed\n", __FUNCTION__);
+
+      if (rmesa->vb.installed) {
+	 if (rmesa->dma.flush)
+	    rmesa->dma.flush( rmesa );
+	 _tnl_wakeup_exec( ctx );
+	 rmesa->vb.installed = GL_FALSE;
+	 vb.context = 0;
+      }
+   }      
+}
 
 
 
-/* Bzzt: Material changes are lost on fallback.
+/* Materials:
  */
-static void radeon_Materialfv( GLenum face, GLenum pname,
+static void radeon_Materialfv( GLenum face, GLenum pname, 
 			       const GLfloat *params )
 {
-   GET_CURRENT_CONTEXT(ctx);
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      VFMT_FALLBACK( __FUNCTION__ );
+      glMaterialfv( face, pname, params );
+      return;
+   }
    _mesa_noop_Materialfv( face, pname, params );
-   radeon_recalc_base_color( ctx );
+   radeonUpdateMaterial( vb.context );
 }
 
 
-
-
-
-/* ================================================================
- * Fallback functions:
+/* Begin/End
  */
-
-static void radeon_do_fallback( GLcontext *ctx )
+static void radeon_Begin( GLenum mode )
 {
+   GLcontext *ctx = vb.context;
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   struct radeon_current_state *current = &rmesa->state.current;
+   
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
-   /* Tell tnl to restore its exec vtxfmt, rehook its driver callbacks
-    * and revive internal state that depended on those callbacks:
-    */
-   _tnl_wakeup_exec( ctx );
+   if (mode > GL_POLYGON) {
+      _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" );
+      return;
+   }
 
-   /* Replay enough vertices that the current primitive is continued
-    * correctly:
-    */
-   if ( rmesa->imm.prim != PRIM_OUTSIDE_BEGIN_END ) {
-      glBegin( rmesa->imm.prim );
-      /*rmesa->fire_on_fallback( ctx );*/
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
+      return;
    }
+   
+   if (ctx->NewState) 
+      _mesa_update_state( ctx );
 
-   /* Replay the current, partially complete vertex:
-    */
-   if ( current->texture[0][3] == 1.0 ) {
-      glMultiTexCoord3fvARB( GL_TEXTURE0_ARB, current->texture[0] );
-   } else {
-      glMultiTexCoord4fvARB( GL_TEXTURE0_ARB, current->texture[0] );
+   if (rmesa->NewGLState)
+      radeonValidateState( ctx );
+
+   if (rmesa->vb.recheck) 
+      radeonVtxfmtValidate( ctx );
+
+   if (!rmesa->vb.installed) {
+      glBegin( mode );
+      return;
    }
 
-   if ( current->texture[1][3] == 1.0 ) {
-      glMultiTexCoord3fvARB( GL_TEXTURE1_ARB, current->texture[1] );
-   } else {
-      glMultiTexCoord4fvARB( GL_TEXTURE1_ARB, current->texture[1] );
+
+   if (rmesa->dma.flush && vb.counter < 12) {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s: flush almost-empty buffers\n", __FUNCTION__);
+      flush_prims( rmesa );
    }
 
-   /* FIXME: Secondary color, fog coord...
+   /* Need to arrange to save vertices here?  Or always copy from dma (yuk)?
     */
+   if (!rmesa->dma.flush) {
+      if (rmesa->dma.current.ptr + 12*vb.vertex_size*4 > 
+	  rmesa->dma.current.end) {
+	 RADEON_NEWPRIM( rmesa );
+	 radeonRefillCurrentDmaRegion( rmesa );
+      }
 
-   if ( ctx->Light.Enabled ) {
-      glColor4fv( ctx->Current.Color );	/* Catch ColorMaterial */
-      glNormal3fv( current->normal );
-   } else {
-      glColor4ubv( current->color );
+      vb.dmaptr = (int *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
+      vb.counter = (rmesa->dma.current.end - rmesa->dma.current.ptr) / 
+	 (vb.vertex_size * 4);
+      vb.counter--;
+      vb.initial_counter = vb.counter;
+      vb.notify = wrap_buffer;
+      rmesa->dma.flush = flush_prims;
+      vb.context->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
    }
+   
+   
+   rmesa->vb.prim[0] = mode;
+   start_prim( rmesa, mode | PRIM_BEGIN );
 }
 
-#define PRE_LOOPBACK( FUNC ) do {					\
-   GET_CURRENT_CONTEXT(ctx);						\
-   radeon_do_fallback( ctx );						\
-} while (0)
 
-#define TAG(x) radeon_fallback_##x
-#include "vtxfmt_tmp.h"
 
+static void radeon_End( void )
+{
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
+   if (rmesa->vb.prim[0] == GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" );
+      return;
+   }
+	  
+   note_last_prim( rmesa, PRIM_END );
+   rmesa->vb.prim[0] = GL_POLYGON+1;
+}
+
+
+/* Fallback on difficult entrypoints:
+ */
+#define PRE_LOOPBACK( FUNC )			\
+do {						\
+   if (RADEON_DEBUG & DEBUG_VFMT) 		\
+      fprintf(stderr, "%s\n", __FUNCTION__);	\
+   VFMT_FALLBACK( __FUNCTION__ );		\
+} while (0)
+#define TAG(x) radeon_fallback_##x
+#include "vtxfmt_tmp.h"
 
 
 
-static void radeon_Begin( GLenum prim )
+static GLboolean radeonNotifyBegin( GLcontext *ctx, GLenum p )
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
-   if ( prim > GL_POLYGON ) {
-      _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" );
-      return;
-   }
-   if ( rmesa->imm.prim != PRIM_OUTSIDE_BEGIN_END ) {
-      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
-      return;
-   }
+   assert(!rmesa->vb.installed);
 
-   ctx->Driver.NeedFlush |= (FLUSH_STORED_VERTICES |
-			     FLUSH_UPDATE_CURRENT);
+   if (ctx->NewState) 
+      _mesa_update_state( ctx );
 
+   if (rmesa->NewGLState)
+      radeonValidateState( ctx );
 
-   radeonChooseVertexState( ctx );
+   if (ctx->Driver.NeedFlush)
+      ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush );
 
+   if (rmesa->vb.recheck) 
+      radeonVtxfmtValidate( ctx );
 
-   rmesa->imm.prim = prim;
-   rmesa->imm.v0 = &rmesa->imm.vertices[0];
+   if (!rmesa->vb.installed) {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s -- failed\n", __FUNCTION__);
+      return GL_FALSE;
+   }
 
-   rmesa->imm.save_vertex = radeon_save_vertex_RGBA;
-   rmesa->imm.flush_vertex = rmesa->imm.flush_tab[prim];
+   radeon_Begin( p );
+   return GL_TRUE;
 }
 
-static void radeon_End( void )
+static void radeonFlushVertices( GLcontext *ctx, GLuint flags )
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 
-   if ( rmesa->imm.prim == PRIM_OUTSIDE_BEGIN_END ) {
-      _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" );
-      return;
-   }
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
-   rmesa->imm.prim = PRIM_OUTSIDE_BEGIN_END;
+   assert(rmesa->vb.installed);
+   assert(vb.context == ctx);
 
-   ctx->Driver.NeedFlush &= ~(FLUSH_STORED_VERTICES |
-			      FLUSH_UPDATE_CURRENT);
-}
+   if (flags & FLUSH_UPDATE_CURRENT) {
+      radeon_copy_to_current( ctx );
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "reinstall on update_current\n");
+      _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+      ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
+   }
 
+   if (flags & FLUSH_STORED_VERTICES) {
+      radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+      assert (rmesa->dma.flush == 0 ||
+	      rmesa->dma.flush == flush_prims);
+      if (rmesa->dma.flush == flush_prims)
+	 flush_prims( RADEON_CONTEXT( ctx ) );
+      ctx->Driver.NeedFlush &= ~FLUSH_STORED_VERTICES;
+   }
+}
 
 
 
+/* At this point, don't expect very many versions of each function to
+ * be generated, so not concerned about freeing them?
+ */
 
 
-void radeonInitTnlModule( GLcontext *ctx )
+void radeonVtxfmtInit( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLvertexformat *vfmt = &(rmesa->imm.vtxfmt);
-
-   return;
-
-   radeon_init_norm_funcs();
-   radeon_init_vert_funcs();
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   GLvertexformat *vfmt = &(rmesa->vb.vtxfmt);
 
    MEMSET( vfmt, 0, sizeof(GLvertexformat) );
 
-   /* Handled fully in supported states:
+   /* Hook in chooser functions for codegen, etc:
     */
-   vfmt->ArrayElement = NULL;				/* FIXME: ... */
-   vfmt->Color3f = radeon_choose_Color3f;
-   vfmt->Color3fv = radeon_choose_Color3fv;
-   vfmt->Color3ub = radeon_choose_Color3ub;
-   vfmt->Color3ubv = radeon_choose_Color3ubv;
-   vfmt->Color4f = radeon_choose_Color4f;
-   vfmt->Color4fv = radeon_choose_Color4fv;
-   vfmt->Color4ub = radeon_choose_Color4ub;
-   vfmt->Color4ubv = radeon_choose_Color4ubv;
-   vfmt->FogCoordfvEXT = radeon_FogCoordfvEXT;
-   vfmt->FogCoordfEXT = radeon_FogCoordfEXT;
-   vfmt->Materialfv = radeon_Materialfv;
-   vfmt->MultiTexCoord1fARB = radeon_MultiTexCoord1fARB;
-   vfmt->MultiTexCoord1fvARB = radeon_MultiTexCoord1fvARB;
-   vfmt->MultiTexCoord2fARB = radeon_MultiTexCoord2fARB;
-   vfmt->MultiTexCoord2fvARB = radeon_MultiTexCoord2fvARB;
-   vfmt->MultiTexCoord3fARB = radeon_MultiTexCoord3fARB;
-   vfmt->MultiTexCoord3fvARB = radeon_MultiTexCoord3fvARB;
-   vfmt->MultiTexCoord4fARB = radeon_MultiTexCoord4fARB;
-   vfmt->MultiTexCoord4fvARB = radeon_MultiTexCoord4fvARB;
-   vfmt->Normal3f = radeon_choose_Normal3f;
-   vfmt->Normal3fv = radeon_choose_Normal3fv;
-   vfmt->SecondaryColor3ubEXT = radeon_SecondaryColor3ubEXT;
-   vfmt->SecondaryColor3ubvEXT = radeon_SecondaryColor3ubvEXT;
-   vfmt->SecondaryColor3fEXT = radeon_SecondaryColor3fEXT;
-   vfmt->SecondaryColor3fvEXT = radeon_SecondaryColor3fvEXT;
-   vfmt->TexCoord1f = radeon_TexCoord1f;
-   vfmt->TexCoord1fv = radeon_TexCoord1fv;
-   vfmt->TexCoord2f = radeon_TexCoord2f;
-   vfmt->TexCoord2fv = radeon_TexCoord2fv;
-   vfmt->TexCoord3f = radeon_TexCoord3f;
-   vfmt->TexCoord3fv = radeon_TexCoord3fv;
-   vfmt->TexCoord4f = radeon_TexCoord4f;
-   vfmt->TexCoord4fv = radeon_TexCoord4fv;
-   vfmt->Vertex2f = radeon_Vertex2f;
-   vfmt->Vertex2fv = radeon_Vertex2fv;
-   vfmt->Vertex3f = radeon_Vertex3f;
-   vfmt->Vertex3fv = radeon_Vertex3fv;
-   vfmt->Vertex4f = radeon_Vertex4f;
-   vfmt->Vertex4fv = radeon_Vertex4fv;
+   radeonVtxfmtInitChoosers( vfmt );
 
+   /* Handled fully in supported states, but no codegen:
+    */
+   vfmt->Materialfv = radeon_Materialfv;
+   vfmt->ArrayElement = _ae_loopback_array_elt;	        /* generic helper */
+   vfmt->Rectf = _mesa_noop_Rectf;			/* generic helper */
    vfmt->Begin = radeon_Begin;
    vfmt->End = radeon_End;
 
-   vfmt->Rectf = _mesa_noop_Rectf;			/* generic helper */
-
-   vfmt->DrawArrays = NULL;
-   vfmt->DrawElements = NULL;
-   vfmt->DrawRangeElements = _mesa_noop_DrawRangeElements; /* discard range */
+   /* Fallback for performance reasons:  (Fix with cva/elt path here and
+    * dmatmp2.h style primitive-merging)
+    *
+    * These should call NotifyBegin(), as should _tnl_EvalMesh, to allow
+    * a driver-hook.
+    */
+   vfmt->DrawArrays = radeon_fallback_DrawArrays;
+   vfmt->DrawElements = radeon_fallback_DrawElements;
+   vfmt->DrawRangeElements = radeon_fallback_DrawRangeElements; 
 
 
    /* Not active in supported states; just keep ctx->Current uptodate:
     */
+   vfmt->FogCoordfvEXT = _mesa_noop_FogCoordfvEXT;
+   vfmt->FogCoordfEXT = _mesa_noop_FogCoordfEXT;
    vfmt->EdgeFlag = _mesa_noop_EdgeFlag;
    vfmt->EdgeFlagv = _mesa_noop_EdgeFlagv;
    vfmt->Indexi = _mesa_noop_Indexi;
@@ -640,10 +963,6 @@ void radeonInitTnlModule( GLcontext *ctx )
 
 
    /* Active but unsupported -- fallback if we receive these:
-    *
-    * All of these fallbacks can be fixed with additional code, except
-    * CallList, unless we build a play_immediate_noop() command which
-    * turns an immediate back into glBegin/glEnd commands...
     */
    vfmt->CallList = radeon_fallback_CallList;
    vfmt->EvalCoord1f = radeon_fallback_EvalCoord1f;
@@ -654,132 +973,137 @@ void radeonInitTnlModule( GLcontext *ctx )
    vfmt->EvalMesh2 = radeon_fallback_EvalMesh2;
    vfmt->EvalPoint1 = radeon_fallback_EvalPoint1;
    vfmt->EvalPoint2 = radeon_fallback_EvalPoint2;
-
-
-   rmesa->imm.prim = PRIM_OUTSIDE_BEGIN_END;
-
-   /* THIS IS A HACK!
-    */
-   _mesa_install_exec_vtxfmt( ctx, vfmt );
+   vfmt->TexCoord3f = radeon_fallback_TexCoord3f;
+   vfmt->TexCoord3fv = radeon_fallback_TexCoord3fv;
+   vfmt->TexCoord4f = radeon_fallback_TexCoord4f;
+   vfmt->TexCoord4fv = radeon_fallback_TexCoord4fv;
+   vfmt->MultiTexCoord3fARB = radeon_fallback_MultiTexCoord3fARB;
+   vfmt->MultiTexCoord3fvARB = radeon_fallback_MultiTexCoord3fvARB;
+   vfmt->MultiTexCoord4fARB = radeon_fallback_MultiTexCoord4fARB;
+   vfmt->MultiTexCoord4fvARB = radeon_fallback_MultiTexCoord4fvARB;
+   vfmt->Vertex4f = radeon_fallback_Vertex4f;
+   vfmt->Vertex4fv = radeon_fallback_Vertex4fv;
+
+   (void)radeon_fallback_vtxfmt;
+
+   TNL_CONTEXT(ctx)->Driver.NotifyBegin = radeonNotifyBegin;
+
+   vb.context = ctx;
+   rmesa->vb.enabled = 1;
+   rmesa->vb.prim = &ctx->Driver.CurrentExecPrimitive;
+   rmesa->vb.primflags = 0;
+
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex2f );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex2fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4ub );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4ubv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3ub );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3ubv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4f );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.Normal3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Normal3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord2f );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord2fv );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord1f );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord1fv );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
+
+   radeonInitCodegen( &rmesa->vb.codegen );
 }
 
-
-
-
-
-
-#if 0
-
-
-
-static void radeon_Begin( GLenum prim )
+static void free_funcs( struct dynfn *l )
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   radeon_prim *tab = &radeon_prim_tab[(int)prim];
-
-   if ( prim > GL_POLYGON ) {
-      gl_error( ctx, GL_INVALID_ENUM, "glBegin" );
-      return;
-   }
-
-   if ( rmesa->prim != PRIM_OUTSIDE_BEGIN_END ) {
-      gl_error( ctx, GL_INVALID_OPERATION, "glBegin" );
-      return;
-   }
-
-   if ( tab->fire_on_vertex ) {
-      rmesa->fire_on_vertex = tab->fire_on_vertex;
-      rmesa->fire_on_end = tab->fire_on_end;
-      rmesa->fire_on_fallback = tab->fire_on_fallback;
-      rmesa->vert = &(rmesa->cache[0]);
-      rmesa->prim = prim;
-      ctx->Driver.NeedFlush |= (FLUSH_INSIDE_BEGIN_END |
-				FLUSH_STORED_VERTICES);
-   } else {
-      radeon_fallback_vtxfmt( ctx );
+   struct dynfn *f, *tmp;
+   foreach_s (f, tmp, l) {
+      remove_from_list( f );
+      ALIGN_FREE( f->code );
+      FREE( f );
    }
 }
 
-static void radeon_End( void )
+void radeonVtxfmtUnbindContext( GLcontext *ctx )
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if ( rmesa->prim == PRIM_OUTSIDE_BEGIN_END ) {
-      gl_error( ctx, GL_INVALID_OPERATION, "glEnd" );
-      return;
+   if (RADEON_CONTEXT(ctx)->vb.installed) {
+      assert(vb.context == ctx);
+      VFMT_FALLBACK_OUTSIDE_BEGIN_END( __FUNCTION__ );
    }
 
-   rmesa->fire_on_end( ctx );
-   rmesa->prim = PRIM_OUTSIDE_BEGIN_END;
-
-   ctx->Exec->Vertex3fv = radeon_noop_Vertex3fv;
-   ctx->Exec->Vertex3f = radeon_noop_Vertex3f;
-   ctx->Exec->Vertex2f = radeon_noop_Vertex2f;
-
-   ctx->Driver.NeedFlush &= ~(FLUSH_INSIDE_BEGIN_END |
-			      FLUSH_STORED_VERTICES);
+   TNL_CONTEXT(ctx)->Driver.NotifyBegin = 0;
 }
 
 
-
-
-static GLboolean radeon_flush_vtxfmt( GLcontext *ctx, GLuint flags )
+void radeonVtxfmtMakeCurrent( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if ( fxMesa->prim != PRIM_OUTSIDE_BEGIN_END )
-      return GL_FALSE;
-
-   /* Outside begin/end.  All vertices will already be flushed, just
-    * update ctx->Current.
-    */
-   if ( flags & FLUSH_UPDATE_CURRENT ) {
-      radeonClipVertexPtr v = &(RADEON_CONTEXT(ctx)->Current);
-      COPY_2FV( ctx->Current.Texcoord[0], v->texcoord[0] );
-      COPY_2FV( ctx->Current.Texcoord[1], v->texcoord[1] );
-      if ( rmesa->accel_light == ACCEL_LIGHT ) {
-	 COPY_3FV( ctx->Current.Normal, v->normal );
-      } else {
-	 ctx->Current.Color[RCOMP] = UBYTE_TO_CHAN( v->v.color.red );
-	 ctx->Current.Color[GCOMP] = UBYTE_TO_CHAN( v->v.color.green );
-	 ctx->Current.Color[BCOMP] = UBYTE_TO_CHAN( v->v.color.blue );
-	 ctx->Current.Color[ACOMP] = UBYTE_TO_CHAN( v->v.color.alpha );
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+#if defined(THREADS)
+   static GLboolean ThreadSafe = GL_FALSE;  /* In thread-safe mode? */
+   if (!ThreadSafe) {
+      static unsigned long knownID;
+      static GLboolean firstCall = GL_TRUE;
+      if (firstCall) {
+         knownID = _glthread_GetID();
+         firstCall = GL_FALSE;
+      }
+      else if (knownID != _glthread_GetID()) {
+         ThreadSafe = GL_TRUE;
 
-	 if ( ctx->Light.ColorMaterialEnabled )
-	    _mesa_update_color_material( ctx, ctx->Current.Color );
+	 if (RADEON_DEBUG & (DEBUG_DRI|DEBUG_VFMT))
+	    fprintf(stderr, "**** Multithread situation!\n");
       }
    }
+   if (ThreadSafe) 
+      return;
+#endif
 
-   /* Could clear this flag and set it from each 'choose' function,
-    * maybe, but there isn't much of a penalty for leaving it set:
-    */
-   ctx->Driver.NeedFlush = FLUSH_UPDATE_CURRENT;
-   return GL_TRUE;
+   if (rmesa->vb.enabled) {
+      TNL_CONTEXT(ctx)->Driver.NotifyBegin = radeonNotifyBegin;
+   }
 }
 
-void radeon_update_lighting( GLcontext *ctx )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   if ( !ctx->Light.Enabled ) {
-      rmesa->accel_light = ACCEL_NO_LIGHT;
-   }
-   else if ( !ctx->Light._NeedVertices && !ctx->Light.Model.TwoSide ) {
-      rmesa->accel_light = ACCEL_LIGHT;
-      radeon_recalc_basecolor( ctx );
-   }
-   else {
-      radeon->accel_light = 0;
-   }
+void radeonVtxfmtDestroy( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   count_funcs( rmesa );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex2f );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex2fv );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex3f );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex3fv );
+   free_funcs( &rmesa->vb.dfn_cache.Color4ub );
+   free_funcs( &rmesa->vb.dfn_cache.Color4ubv );
+   free_funcs( &rmesa->vb.dfn_cache.Color3ub );
+   free_funcs( &rmesa->vb.dfn_cache.Color3ubv );
+   free_funcs( &rmesa->vb.dfn_cache.Color4f );
+   free_funcs( &rmesa->vb.dfn_cache.Color4fv );
+   free_funcs( &rmesa->vb.dfn_cache.Color3f );
+   free_funcs( &rmesa->vb.dfn_cache.Color3fv );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   free_funcs( &rmesa->vb.dfn_cache.Normal3f );
+   free_funcs( &rmesa->vb.dfn_cache.Normal3fv );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord2f );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord2fv );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord1f );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord1fv );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
 }
 
-
-/* How to fallback:
- *   - install default vertex format
- *   - call glBegin
- *   - revive stalled vertices (may be reordered).
- *   - re-issue call that caused fallback.
- */
-#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.h
index d0bc1c04d..093c26d15 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.h
@@ -1,30 +1,36 @@
 /* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
 /*
- * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
  * Authors:
- *    Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
  */
 
 #ifndef __RADEON_VTXFMT_H__
@@ -34,13 +40,89 @@
 
 #include "radeon_context.h"
 
-extern void radeonInitTnlModule( GLcontext *ctx );
 
 
-extern radeon_flush_func radeon_flush_tab_tiny[GL_POLYGON+1];
-extern radeon_flush_func radeon_flush_tab_notex[GL_POLYGON+1];
-extern radeon_flush_func radeon_flush_tab_tex0[GL_POLYGON+1];
-extern radeon_flush_func radeon_flush_tab_tex1[GL_POLYGON+1];
+extern struct radeon_vb vb;
+
+
+extern void radeonVtxfmtUpdate( GLcontext *ctx );
+extern void radeonVtxfmtInit( GLcontext *ctx );
+extern void radeonVtxfmtInvalidate( GLcontext *ctx );
+extern void radeonVtxfmtDestroy( GLcontext *ctx );
+extern void radeonVtxfmtInitChoosers( GLvertexformat *vfmt );
+
+extern void radeonVtxfmtMakeCurrent( GLcontext *ctx );
+extern void radeonVtxfmtUnbindContext( GLcontext *ctx );
+
+extern void radeon_copy_to_current( GLcontext *ctx );
+
+#define DFN( FUNC, CACHE)				\
+do {							\
+   char *start = (char *)&FUNC;				\
+   char *end = (char *)&FUNC##_end;			\
+   insert_at_head( &CACHE, dfn );			\
+   dfn->key = key;					\
+   dfn->code = ALIGN_MALLOC( end - start, 16 );		\
+   memcpy (dfn->code, start, end - start);		\
+}							\
+while ( 0 )
+
+#define FIXUP( CODE, OFFSET, CHECKVAL, NEWVAL )	\
+do {						\
+   int *icode = (int *)(CODE+OFFSET);		\
+   assert (*icode == CHECKVAL);			\
+   *icode = (int)NEWVAL;			\
+} while (0)
+
+
+/* Useful for figuring out the offsets:
+ */
+#define FIXUP2( CODE, OFFSET, CHECKVAL, NEWVAL )		\
+do {								\
+   while (*(int *)(CODE+OFFSET) != CHECKVAL) OFFSET++;		\
+   fprintf(stderr, "%s/%d CVAL %x OFFSET %d VAL %x\n", __FUNCTION__,	\
+	   __LINE__, CHECKVAL, OFFSET, (int)(NEWVAL));			\
+   *(int *)(CODE+OFFSET) = (int)(NEWVAL);				\
+   OFFSET += 4;							\
+} while (0)
+
+/* 
+ */
+void radeonInitCodegen( struct dfn_generators *gen );
+void radeonInitX86Codegen( struct dfn_generators *gen );
+void radeonInitSSECodegen( struct dfn_generators *gen );
+
+
+
+/* Defined in radeon_vtxfmt_x86.c
+ */
+struct dynfn *radeon_makeX86Vertex2f( GLcontext *, int );
+struct dynfn *radeon_makeX86Vertex2fv( GLcontext *, int );
+struct dynfn *radeon_makeX86Vertex3f( GLcontext *, int );
+struct dynfn *radeon_makeX86Vertex3fv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4ub( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4ubv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3ub( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3ubv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4f( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4fv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3f( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3fv( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3ubEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3ubvEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3fEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3fvEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86Normal3f( GLcontext *, int );
+struct dynfn *radeon_makeX86Normal3fv( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord2f( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord2fv( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord1f( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord1fv( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord2fARB( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord2fvARB( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord1fARB( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord1fvARB( GLcontext *, int );
+
 
 #endif
 #endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_c.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_c.c
new file mode 100644
index 000000000..4d4ed6ca1
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_c.c
@@ -0,0 +1,728 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "mtypes.h"
+#include "colormac.h"
+#include "simple_list.h"
+#include "api_noop.h"
+#include "vtxfmt.h"
+
+#include "radeon_vtxfmt.h"
+
+/* Fallback versions of all the entrypoints for situations where
+ * codegen isn't available.  This is still a lot faster than the
+ * vb/pipeline implementation in Mesa.
+ */
+static void radeon_Vertex3f( GLfloat x, GLfloat y, GLfloat z )
+{
+   int i;
+
+   *vb.dmaptr++ = *(int *)&x;
+   *vb.dmaptr++ = *(int *)&y;
+   *vb.dmaptr++ = *(int *)&z;
+
+   for (i = 3; i < vb.vertex_size; i++)
+      *vb.dmaptr++ = vb.vertex[i].i;
+   
+   if (--vb.counter == 0)
+      vb.notify();
+}
+
+
+static void radeon_Vertex3fv( const GLfloat *v )
+{
+   int i;
+
+   *vb.dmaptr++ = *(int *)&v[0];
+   *vb.dmaptr++ = *(int *)&v[1];
+   *vb.dmaptr++ = *(int *)&v[2];
+
+   for (i = 3; i < vb.vertex_size; i++)
+      *vb.dmaptr++ = vb.vertex[i].i;
+   
+   if (--vb.counter == 0)
+      vb.notify();
+}
+
+
+static void radeon_Vertex2f( GLfloat x, GLfloat y )
+{
+   int i;
+
+   *vb.dmaptr++ = *(int *)&x;
+   *vb.dmaptr++ = *(int *)&y;
+   *vb.dmaptr++ = 0;
+
+   for (i = 3; i < vb.vertex_size; i++)
+      *vb.dmaptr++ = *(int *)&vb.vertex[i];
+   
+   if (--vb.counter == 0)
+      vb.notify();
+}
+
+
+static void radeon_Vertex2fv( const GLfloat *v )
+{
+   int i;
+
+   *vb.dmaptr++ = *(int *)&v[0];
+   *vb.dmaptr++ = *(int *)&v[1];
+   *vb.dmaptr++ = 0;
+
+   for (i = 3; i < vb.vertex_size; i++)
+      *vb.dmaptr++ = vb.vertex[i].i;
+   
+   if (--vb.counter == 0)
+      vb.notify();
+}
+
+
+
+/* Color for ubyte (packed) color formats:
+ */
+static void radeon_Color3ub_ub( GLubyte r, GLubyte g, GLubyte b )
+{
+   GLubyte *dest = vb.ubytecolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = 0xff;
+}
+
+static void radeon_Color3ubv_ub( const GLubyte *v )
+{
+   GLubyte *dest = vb.ubytecolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = 0xff;
+}
+
+static void radeon_Color4ub_ub( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   GLubyte *dest = vb.ubytecolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = a;
+}
+
+static void radeon_Color4ubv_ub( const GLubyte *v )
+{
+   GLubyte *dest = vb.ubytecolorptr;
+   *(int *)dest = *(int *)v;
+}
+
+
+static void radeon_Color3f_ub( GLfloat r, GLfloat g, GLfloat b )
+{
+   GLubyte *dest = vb.ubytecolorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[0], r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[1], g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[2], b );
+   dest[3] = 255;
+}
+
+static void radeon_Color3fv_ub( const GLfloat *v )
+{
+   GLubyte *dest = vb.ubytecolorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[0], v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[1], v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[2], v[2] );
+   dest[3] = 255;
+}
+
+static void radeon_Color4f_ub( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   GLubyte *dest = vb.ubytecolorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[0], r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[1], g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[2], b );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[3], a );
+}
+
+static void radeon_Color4fv_ub( const GLfloat *v )
+{
+   GLubyte *dest = vb.ubytecolorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[0], v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[1], v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[2], v[2] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[3], v[3] );
+}
+
+
+/* Color for float color+alpha formats:
+ */
+static void radeon_Color3ub_4f( GLubyte r, GLubyte g, GLubyte b )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   dest[3] = 1.0;
+}
+
+static void radeon_Color3ubv_4f( const GLubyte *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   dest[3] = 1.0;
+}
+
+static void radeon_Color4ub_4f( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   dest[3] = UBYTE_TO_FLOAT(a);
+}
+
+static void radeon_Color4ubv_4f( const GLubyte *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   dest[3] = UBYTE_TO_FLOAT(v[3]);
+}
+
+
+static void radeon_Color3f_4f( GLfloat r, GLfloat g, GLfloat b )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = 1.0;		
+}
+
+static void radeon_Color3fv_4f( const GLfloat *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = 1.0;
+}
+
+static void radeon_Color4f_4f( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = a;
+}
+
+static void radeon_Color4fv_4f( const GLfloat *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = v[3];
+}
+
+
+/* Color for float color formats:
+ */
+static void radeon_Color3ub_3f( GLubyte r, GLubyte g, GLubyte b )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+}
+
+static void radeon_Color3ubv_3f( const GLubyte *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+}
+
+static void radeon_Color4ub_3f( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   vb.context->Current.Color[3] = UBYTE_TO_FLOAT(a);
+}
+
+static void radeon_Color4ubv_3f( const GLubyte *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   vb.context->Current.Color[3] = UBYTE_TO_FLOAT(v[3]);
+}
+
+
+static void radeon_Color3f_3f( GLfloat r, GLfloat g, GLfloat b )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+}
+
+static void radeon_Color3fv_3f( const GLfloat *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+}
+
+static void radeon_Color4f_3f( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   vb.context->Current.Color[3] = a;
+}
+
+static void radeon_Color4fv_3f( const GLfloat *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   vb.context->Current.Color[3] = v[3]; 
+}
+
+
+/* Secondary Color:
+ */
+static void radeon_SecondaryColor3ubEXT( GLubyte r, GLubyte g, GLubyte b )
+{
+   GLubyte *dest = vb.ubytespecptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = 0xff;
+}
+
+static void radeon_SecondaryColor3ubvEXT( const GLubyte *v )
+{
+   GLubyte *dest = vb.ubytespecptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = 0xff;
+}
+
+static void radeon_SecondaryColor3fEXT( GLfloat r, GLfloat g, GLfloat b )
+{
+   GLubyte *dest = vb.ubytespecptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[0], r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[1], g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[2], b );
+   dest[3] = 255;
+}
+
+static void radeon_SecondaryColor3fvEXT( const GLfloat *v )
+{
+   GLubyte *dest = vb.ubytespecptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[0], v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[1], v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest[2], v[2] );
+   dest[3] = 255;
+}
+
+
+
+/* Normal
+ */
+static void radeon_Normal3f( GLfloat n0, GLfloat n1, GLfloat n2 )
+{
+   GLfloat *dest = vb.normalptr;
+   dest[0] = n0;
+   dest[1] = n1;
+   dest[2] = n2;
+}
+
+static void radeon_Normal3fv( const GLfloat *v )
+{
+   GLfloat *dest = vb.normalptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+}
+
+
+/* TexCoord
+ */
+static void radeon_TexCoord1f( GLfloat s )
+{
+   GLfloat *dest = vb.texcoordptr[0];
+   dest[0] = s;
+   dest[1] = 0;
+}
+
+static void radeon_TexCoord1fv( const GLfloat *v )
+{
+   GLfloat *dest = vb.texcoordptr[0];
+   dest[0] = v[0];
+   dest[1] = 0;
+}
+
+static void radeon_TexCoord2f( GLfloat s, GLfloat t )
+{
+   GLfloat *dest = vb.texcoordptr[0];
+   dest[0] = s;
+   dest[1] = t;
+}
+
+static void radeon_TexCoord2fv( const GLfloat *v )
+{
+   GLfloat *dest = vb.texcoordptr[0];
+   dest[0] = v[0];
+   dest[1] = v[1];
+}
+
+
+/* MultiTexcoord
+ */
+static void radeon_MultiTexCoord1fARB( GLenum target, GLfloat s  )
+{
+   GLfloat *dest = vb.texcoordptr[(target - GL_TEXTURE0_ARB)&1];
+   dest[0] = s;
+   dest[1] = 0;
+}
+
+static void radeon_MultiTexCoord1fvARB( GLenum target, const GLfloat *v )
+{
+   GLfloat *dest = vb.texcoordptr[(target - GL_TEXTURE0_ARB)&1];
+   dest[0] = v[0];
+   dest[1] = 0;
+}
+
+static void radeon_MultiTexCoord2fARB( GLenum target, GLfloat s, GLfloat t )
+{
+   GLfloat *dest = vb.texcoordptr[(target - GL_TEXTURE0_ARB)&1];
+   dest[0] = s;
+   dest[1] = t;
+}
+
+static void radeon_MultiTexCoord2fvARB( GLenum target, const GLfloat *v )
+{
+   GLfloat *dest = vb.texcoordptr[(target - GL_TEXTURE0_ARB)&1];
+   dest[0] = v[0];
+   dest[1] = v[1];
+}
+
+static struct dynfn *lookup( struct dynfn *l, int key )
+{
+   struct dynfn *f;
+
+   foreach( f, l ) {
+      if (f->key == key) 
+	 return f;
+   }
+
+   return 0;
+}
+
+/* Can't use the loopback template for this:
+ */
+
+#define CHOOSE(FN, FNTYPE, MASK, ACTIVE, ARGS1, ARGS2 )			\
+static void choose_##FN ARGS1						\
+{									\
+   radeonContextPtr rmesa = RADEON_CONTEXT(vb.context);			\
+   int key = rmesa->vb.vertex_format & (MASK|ACTIVE);			\
+   struct dynfn *dfn = lookup( &rmesa->vb.dfn_cache.FN, key );		\
+									\
+   if (dfn == 0)							\
+      dfn = rmesa->vb.codegen.FN( vb.context, key );			\
+   else if (RADEON_DEBUG & DEBUG_CODEGEN)				\
+      fprintf(stderr, "%s -- cached codegen\n", __FUNCTION__ );		\
+									\
+   if (dfn)								\
+      vb.context->Exec->FN = (FNTYPE)(dfn->code);			\
+   else {								\
+      if (RADEON_DEBUG & DEBUG_CODEGEN)					\
+	 fprintf(stderr, "%s -- generic version\n", __FUNCTION__ );	\
+      vb.context->Exec->FN = radeon_##FN;				\
+   }									\
+									\
+   vb.context->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;		\
+   vb.context->Exec->FN ARGS2;						\
+}
+
+
+
+/* For the _3f case, only allow one color function to be hooked in at
+ * a time.  Eventually, use a similar mechanism to allow selecting the
+ * color component of the vertex format based on client behaviour.  
+ *
+ * Note:  Perform these actions even if there is a codegen or cached 
+ * codegen version of the chosen function.
+ */
+#define CHOOSE_COLOR(FN, FNTYPE, NR, MASK, ACTIVE, ARGS1, ARGS2 )	\
+static void choose_##FN ARGS1						\
+{									\
+   GLcontext *ctx = vb.context;						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(vb.context);			\
+   int key = rmesa->vb.vertex_format & (MASK|ACTIVE);			\
+   struct dynfn *dfn;							\
+									\
+   if (rmesa->vb.vertex_format & ACTIVE_PKCOLOR) {			\
+      ctx->Exec->FN = radeon_##FN##_ub;					\
+   }									\
+   else if ((rmesa->vb.vertex_format &					\
+            (ACTIVE_FPCOLOR|ACTIVE_FPALPHA)) == ACTIVE_FPCOLOR) {	\
+									\
+      if (rmesa->vb.installed_color_3f_sz != NR) {			\
+         rmesa->vb.installed_color_3f_sz = NR;				\
+         if (NR == 3) ctx->Current.Color[3] = 1.0;			\
+         if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) {		\
+            radeon_copy_to_current( ctx );				\
+            _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );	\
+            ctx->Exec->FN ARGS2;					\
+            return;							\
+         }								\
+      }									\
+									\
+      ctx->Exec->FN = radeon_##FN##_3f;					\
+   }									\
+   else {								\
+      ctx->Exec->FN = radeon_##FN##_4f;					\
+   }									\
+									\
+									\
+   dfn = lookup( &rmesa->vb.dfn_cache.FN, key );			\
+   if (!dfn) dfn = rmesa->vb.codegen.FN( ctx, key );			\
+									\
+   if (dfn) {								\
+      if (RADEON_DEBUG & DEBUG_CODEGEN)					\
+         fprintf(stderr, "%s -- codegen version\n", __FUNCTION__ );	\
+      ctx->Exec->FN = (FNTYPE)dfn->code;				\
+   }									\
+   else if (RADEON_DEBUG & DEBUG_CODEGEN)				\
+         fprintf(stderr, "%s -- 'c' version\n", __FUNCTION__ );		\
+									\
+   ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;			\
+   ctx->Exec->FN ARGS2;							\
+}
+
+
+
+
+
+/* Shorthands
+ */
+#define ACTIVE_XYZW (RADEON_CP_VC_FRMT_W0|RADEON_CP_VC_FRMT_Z)
+#define ACTIVE_NORM RADEON_CP_VC_FRMT_N0
+#define ACTIVE_PKCOLOR RADEON_CP_VC_FRMT_PKCOLOR
+#define ACTIVE_FPCOLOR RADEON_CP_VC_FRMT_FPCOLOR
+#define ACTIVE_FPALPHA RADEON_CP_VC_FRMT_FPALPHA
+#define ACTIVE_COLOR (ACTIVE_FPCOLOR|ACTIVE_PKCOLOR)
+#define ACTIVE_SPEC RADEON_CP_VC_FRMT_PKSPEC
+#define ACTIVE_ST0 RADEON_CP_VC_FRMT_ST0
+#define ACTIVE_ST1 RADEON_CP_VC_FRMT_ST1
+#define ACTIVE_ST_ALL (RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST0)
+
+/* Each codegen function should be able to be fully specified by a
+ * subsetted version of rmesa->vb.vertex_format.
+ */
+#define MASK_NORM    (ACTIVE_XYZW)
+#define MASK_COLOR   (MASK_NORM|ACTIVE_NORM)
+#define MASK_SPEC    (MASK_COLOR|ACTIVE_COLOR)
+#define MASK_ST0     (MASK_SPEC|ACTIVE_SPEC)
+#define MASK_ST1     (MASK_ST0|ACTIVE_ST0)
+#define MASK_ST_ALL  (MASK_ST1|ACTIVE_ST1)
+#define MASK_VERTEX  (MASK_ST_ALL|ACTIVE_FPALPHA) 
+
+
+typedef void (*p4f)( GLfloat, GLfloat, GLfloat, GLfloat );
+typedef void (*p3f)( GLfloat, GLfloat, GLfloat );
+typedef void (*p2f)( GLfloat, GLfloat );
+typedef void (*p1f)( GLfloat );
+typedef void (*pe2f)( GLenum, GLfloat, GLfloat );
+typedef void (*pe1f)( GLenum, GLfloat );
+typedef void (*p4ub)( GLubyte, GLubyte, GLubyte, GLubyte );
+typedef void (*p3ub)( GLubyte, GLubyte, GLubyte );
+typedef void (*pfv)( const GLfloat * );
+typedef void (*pefv)( GLenum, const GLfloat * );
+typedef void (*pubv)( const GLubyte * );
+
+
+CHOOSE(Normal3f, p3f, MASK_NORM, ACTIVE_NORM, 
+       (GLfloat a,GLfloat b,GLfloat c), (a,b,c))
+CHOOSE(Normal3fv, pfv, MASK_NORM, ACTIVE_NORM, 
+       (const GLfloat *v), (v))
+
+CHOOSE_COLOR(Color4ub, p4ub, 4, MASK_COLOR, ACTIVE_COLOR,
+	(GLubyte a,GLubyte b, GLubyte c, GLubyte d), (a,b,c,d))
+CHOOSE_COLOR(Color4ubv, pubv, 4, MASK_COLOR, ACTIVE_COLOR, 
+	(const GLubyte *v), (v))
+CHOOSE_COLOR(Color3ub, p3ub, 3, MASK_COLOR, ACTIVE_COLOR, 
+	(GLubyte a,GLubyte b, GLubyte c), (a,b,c))
+CHOOSE_COLOR(Color3ubv, pubv, 3, MASK_COLOR, ACTIVE_COLOR, 
+	(const GLubyte *v), (v))
+
+CHOOSE_COLOR(Color4f, p4f, 4, MASK_COLOR, ACTIVE_COLOR, 
+	(GLfloat a,GLfloat b, GLfloat c, GLfloat d), (a,b,c,d))
+CHOOSE_COLOR(Color4fv, pfv, 4, MASK_COLOR, ACTIVE_COLOR, 
+	(const GLfloat *v), (v))
+CHOOSE_COLOR(Color3f, p3f, 3, MASK_COLOR, ACTIVE_COLOR,
+	(GLfloat a,GLfloat b, GLfloat c), (a,b,c))
+CHOOSE_COLOR(Color3fv, pfv, 3, MASK_COLOR, ACTIVE_COLOR,
+	(const GLfloat *v), (v))
+
+
+CHOOSE(SecondaryColor3ubEXT, p3ub, MASK_SPEC, ACTIVE_SPEC, 
+	(GLubyte a,GLubyte b, GLubyte c), (a,b,c))
+CHOOSE(SecondaryColor3ubvEXT, pubv, MASK_SPEC, ACTIVE_SPEC, 
+	(const GLubyte *v), (v))
+CHOOSE(SecondaryColor3fEXT, p3f, MASK_SPEC, ACTIVE_SPEC,
+	(GLfloat a,GLfloat b, GLfloat c), (a,b,c))
+CHOOSE(SecondaryColor3fvEXT, pfv, MASK_SPEC, ACTIVE_SPEC,
+	(const GLfloat *v), (v))
+
+CHOOSE(TexCoord2f, p2f, MASK_ST0, ACTIVE_ST0, 
+       (GLfloat a,GLfloat b), (a,b))
+CHOOSE(TexCoord2fv, pfv, MASK_ST0, ACTIVE_ST0, 
+       (const GLfloat *v), (v))
+CHOOSE(TexCoord1f, p1f, MASK_ST0, ACTIVE_ST0, 
+       (GLfloat a), (a))
+CHOOSE(TexCoord1fv, pfv, MASK_ST0, ACTIVE_ST0, 
+       (const GLfloat *v), (v))
+
+CHOOSE(MultiTexCoord2fARB, pe2f, MASK_ST_ALL, ACTIVE_ST_ALL,
+	 (GLenum u,GLfloat a,GLfloat b), (u,a,b))
+CHOOSE(MultiTexCoord2fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL,
+	(GLenum u,const GLfloat *v), (u,v))
+CHOOSE(MultiTexCoord1fARB, pe1f, MASK_ST_ALL, ACTIVE_ST_ALL,
+	 (GLenum u,GLfloat a), (u,a))
+CHOOSE(MultiTexCoord1fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL,
+	(GLenum u,const GLfloat *v), (u,v))
+
+CHOOSE(Vertex3f, p3f, MASK_VERTEX, MASK_VERTEX, 
+       (GLfloat a,GLfloat b,GLfloat c), (a,b,c))
+CHOOSE(Vertex3fv, pfv, MASK_VERTEX, MASK_VERTEX, 
+       (const GLfloat *v), (v))
+CHOOSE(Vertex2f, p2f, MASK_VERTEX, MASK_VERTEX, 
+       (GLfloat a,GLfloat b), (a,b))
+CHOOSE(Vertex2fv, pfv, MASK_VERTEX, MASK_VERTEX, 
+       (const GLfloat *v), (v))
+
+
+
+
+
+void radeonVtxfmtInitChoosers( GLvertexformat *vfmt )
+{
+   vfmt->Color3f = choose_Color3f;
+   vfmt->Color3fv = choose_Color3fv;
+   vfmt->Color3ub = choose_Color3ub;
+   vfmt->Color3ubv = choose_Color3ubv;
+   vfmt->Color4f = choose_Color4f;
+   vfmt->Color4fv = choose_Color4fv;
+   vfmt->Color4ub = choose_Color4ub;
+   vfmt->Color4ubv = choose_Color4ubv;
+   vfmt->SecondaryColor3fEXT = choose_SecondaryColor3fEXT;
+   vfmt->SecondaryColor3fvEXT = choose_SecondaryColor3fvEXT;
+   vfmt->SecondaryColor3ubEXT = choose_SecondaryColor3ubEXT;
+   vfmt->SecondaryColor3ubvEXT = choose_SecondaryColor3ubvEXT;
+   vfmt->MultiTexCoord1fARB = choose_MultiTexCoord1fARB;
+   vfmt->MultiTexCoord1fvARB = choose_MultiTexCoord1fvARB;
+   vfmt->MultiTexCoord2fARB = choose_MultiTexCoord2fARB;
+   vfmt->MultiTexCoord2fvARB = choose_MultiTexCoord2fvARB;
+   vfmt->Normal3f = choose_Normal3f;
+   vfmt->Normal3fv = choose_Normal3fv;
+   vfmt->TexCoord1f = choose_TexCoord1f;
+   vfmt->TexCoord1fv = choose_TexCoord1fv;
+   vfmt->TexCoord2f = choose_TexCoord2f;
+   vfmt->TexCoord2fv = choose_TexCoord2fv;
+   vfmt->Vertex2f = choose_Vertex2f;
+   vfmt->Vertex2fv = choose_Vertex2fv;
+   vfmt->Vertex3f = choose_Vertex3f;
+   vfmt->Vertex3fv = choose_Vertex3fv;
+}
+
+
+static struct dynfn *codegen_noop( GLcontext *ctx, int key )
+{
+   (void) ctx; (void) key;
+   return 0;
+}
+
+void radeonInitCodegen( struct dfn_generators *gen )
+{
+   gen->Vertex3f = codegen_noop;
+   gen->Vertex3fv = codegen_noop;
+   gen->Color4ub = codegen_noop;
+   gen->Color4ubv = codegen_noop;
+   gen->Normal3f = codegen_noop;
+   gen->Normal3fv = codegen_noop;
+   gen->TexCoord2f = codegen_noop;
+   gen->TexCoord2fv = codegen_noop;
+   gen->MultiTexCoord2fARB = codegen_noop;
+   gen->MultiTexCoord2fvARB = codegen_noop;
+   gen->Vertex2f = codegen_noop;
+   gen->Vertex2fv = codegen_noop;
+   gen->Color3ub = codegen_noop;
+   gen->Color3ubv = codegen_noop;
+   gen->Color4f = codegen_noop;
+   gen->Color4fv = codegen_noop;
+   gen->Color3f = codegen_noop;
+   gen->Color3fv = codegen_noop;
+   gen->SecondaryColor3fEXT = codegen_noop;
+   gen->SecondaryColor3fvEXT = codegen_noop;
+   gen->SecondaryColor3ubEXT = codegen_noop;
+   gen->SecondaryColor3ubvEXT = codegen_noop;
+   gen->TexCoord1f = codegen_noop;
+   gen->TexCoord1fv = codegen_noop;
+   gen->MultiTexCoord1fARB = codegen_noop;
+   gen->MultiTexCoord1fvARB = codegen_noop;
+
+   if (!getenv("RADEON_NO_CODEGEN")) {
+#if defined(USE_X86_ASM)
+      radeonInitX86Codegen( gen );
+#endif
+
+#if defined(USE_SSE_ASM)
+      radeonInitSSECodegen( gen );
+#endif
+   }
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_sse.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_sse.c
new file mode 100644
index 000000000..633990be7
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_sse.c
@@ -0,0 +1,95 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include "mem.h" 
+#include "simple_list.h" 
+#include "radeon_vtxfmt.h"
+
+#if defined(USE_SSE_ASM)
+
+/* Build specialized versions of the immediate calls on the fly for
+ * the current state.  ???P4 SSE2 versions???
+ */
+
+
+static struct dynfn *makeSSENormal3fv( GLcontext *ctx, int key )
+{
+   /* Requires P4 (sse2?)
+    */
+   static unsigned char temp[] = {
+      0x8b, 0x44, 0x24, 0x04,          	/*  mov    0x4(%esp,1),%eax */
+      0xba, 0x78, 0x56, 0x34, 0x12,   	/*  mov    $0x12345678,%edx */
+      0xf3, 0x0f, 0x7e, 0x00,          	/*  movq   (%eax),%xmm0 */
+      0x66, 0x0f, 0x6e, 0x48, 0x08,    	/*  movd   0x8(%eax),%xmm1 */
+      0x66, 0x0f, 0xd6, 0x42, 0x0c,    	/*  movq   %xmm0,0xc(%edx) */
+      0x66, 0x0f, 0x7e, 0x4a, 0x14,    	/*  movd   %xmm1,0x14(%edx) */
+      0xc3,                   	        /*  ret     */
+   };
+
+
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   insert_at_head( &rmesa->vb.dfn_cache.Normal3fv, dfn );
+   dfn->key = key;
+
+   dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+   memcpy (dfn->code, temp, sizeof(temp));
+   FIXUP(dfn->code, 5, 0x0, (int)vb.normalptr); 
+   return dfn;
+}
+
+void radeonInitSSECodegen( struct dfn_generators *gen )
+{
+   /* Need to: 
+    *    - check kernel sse support
+    *    - check p4/sse2
+    */
+   (void) makeSSENormal3fv;
+}
+
+
+#else 
+
+void radeonInitX86Codegen( struct dfn_generators *gen )
+{
+   (void) gen;
+}
+
+#endif
+
+
+
+
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_x86.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_x86.c
new file mode 100644
index 000000000..6963e94c4
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_x86.c
@@ -0,0 +1,463 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include "mem.h" 
+#include "mmath.h" 
+#include "simple_list.h" 
+#include "radeon_vtxfmt.h"
+
+#if defined(USE_X86_ASM)
+
+#define EXTERN( FUNC )		\
+extern const char *FUNC;	\
+extern const char *FUNC##_end
+
+EXTERN ( _x86_Normal3fv );
+EXTERN ( _x86_Normal3f );
+EXTERN ( _x86_Vertex3fv_6 );
+EXTERN ( _x86_Vertex3fv_8 );
+EXTERN ( _x86_Vertex3fv );
+EXTERN ( _x86_Vertex3f_4 );
+EXTERN ( _x86_Vertex3f_6 );
+EXTERN ( _x86_Vertex3f );
+EXTERN ( _x86_Color4ubv_ub );
+EXTERN ( _x86_Color4ubv_4f );
+EXTERN ( _x86_Color4ub_ub );
+EXTERN ( _x86_Color3fv_3f );
+EXTERN ( _x86_Color3f_3f );
+EXTERN ( _x86_TexCoord2fv );
+EXTERN ( _x86_TexCoord2f );
+EXTERN ( _x86_MultiTexCoord2fvARB );
+EXTERN ( _x86_MultiTexCoord2fvARB_2 );
+EXTERN ( _x86_MultiTexCoord2fARB );
+EXTERN ( _x86_MultiTexCoord2fARB_2 );
+
+
+/* Build specialized versions of the immediate calls on the fly for
+ * the current state.  Generic x86 versions.
+ */
+
+struct dynfn *radeon_makeX86Vertex3f( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x %d\n", __FUNCTION__, key, vb.vertex_size );
+
+   switch (vb.vertex_size) {
+   case 4: {
+
+      DFN ( _x86_Vertex3f_4, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 2, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 25, 0x0, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 36, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 46, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 51, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 60, 0x0, (int)&vb.notify);
+      break;
+   }
+   case 6: {
+
+      DFN ( _x86_Vertex3f_6, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 3, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 28, 0x0, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 34, 0x0, (int)&vb.vertex[4]);
+      FIXUP(dfn->code, 40, 0x0, (int)&vb.vertex[5]);
+      FIXUP(dfn->code, 57, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 63, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 70, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 79, 0x0, (int)&vb.notify);
+      break;
+   }
+   default: {
+
+      DFN ( _x86_Vertex3f, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 3, 0x0, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 9, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 37, 0x0, vb.vertex_size-3);
+      FIXUP(dfn->code, 44, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 50, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 56, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 67, 0x0, (int)&vb.notify);
+   break;
+   }
+   }
+
+   return dfn;
+}
+
+
+
+struct dynfn *radeon_makeX86Vertex3fv( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x %d\n", __FUNCTION__, key, vb.vertex_size );
+
+   switch (vb.vertex_size) {
+   case 6: {
+
+      DFN ( _x86_Vertex3fv_6, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 1, 0x00000000, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 27, 0x0000001c, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 33, 0x00000020, (int)&vb.vertex[4]);
+      FIXUP(dfn->code, 45, 0x00000024, (int)&vb.vertex[5]);
+      FIXUP(dfn->code, 56, 0x00000000, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 61, 0x00000004, (int)&vb.counter);
+      FIXUP(dfn->code, 67, 0x00000004, (int)&vb.counter);
+      FIXUP(dfn->code, 76, 0x00000008, (int)&vb.notify);
+      break;
+   }
+   
+
+   case 8: {
+
+      DFN ( _x86_Vertex3fv_8, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 1, 0x00000000, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 27, 0x0000001c, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 33, 0x00000020, (int)&vb.vertex[4]);
+      FIXUP(dfn->code, 45, 0x0000001c, (int)&vb.vertex[5]);
+      FIXUP(dfn->code, 51, 0x00000020, (int)&vb.vertex[6]);
+      FIXUP(dfn->code, 63, 0x00000024, (int)&vb.vertex[7]);
+      FIXUP(dfn->code, 74, 0x00000000, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 79, 0x00000004, (int)&vb.counter);
+      FIXUP(dfn->code, 85, 0x00000004, (int)&vb.counter);
+      FIXUP(dfn->code, 94, 0x00000008, (int)&vb.notify);
+      break;
+   }
+   
+
+
+   default: {
+
+      DFN ( _x86_Vertex3fv, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 8, 0x01010101, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 32, 0x00000006, vb.vertex_size-3);
+      FIXUP(dfn->code, 37, 0x00000058, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 45, 0x01010101, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 50, 0x02020202, (int)&vb.counter);
+      FIXUP(dfn->code, 58, 0x02020202, (int)&vb.counter);
+      FIXUP(dfn->code, 67, 0x0, (int)&vb.notify);
+   break;
+   }
+   }
+
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86Normal3fv( GLcontext *ctx, int key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int i = 0;
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   DFN ( _x86_Normal3fv, rmesa->vb.dfn_cache.Normal3fv );
+
+   FIXUP2(dfn->code, i, 0x0, (int)vb.normalptr); 
+   FIXUP2(dfn->code, i, 0x4, 4+(int)vb.normalptr); 
+   FIXUP2(dfn->code, i, 0x8, 8+(int)vb.normalptr); 
+   fprintf(stderr, "%s done\n", __FUNCTION__);
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86Normal3f( GLcontext *ctx, int key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   DFN ( _x86_Normal3f, rmesa->vb.dfn_cache.Normal3f );
+   FIXUP(dfn->code, 1, 0x12345678, (int)vb.normalptr); 
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86Color4ubv( GLcontext *ctx, int key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if (key & RADEON_CP_VC_FRMT_PKCOLOR) {
+      DFN ( _x86_Color4ubv_ub, rmesa->vb.dfn_cache.Color4ubv);
+      FIXUP(dfn->code, 5, 0x12345678, (int)vb.ubytecolorptr); 
+      return dfn;
+   } 
+   else {
+
+      DFN ( _x86_Color4ubv_4f, rmesa->vb.dfn_cache.Color4ubv);
+      FIXUP(dfn->code, 2, 0x00000000, (int)_mesa_ubyte_to_float_color_tab); 
+      FIXUP(dfn->code, 27, 0xdeadbeaf, (int)vb.floatcolorptr); 
+      FIXUP(dfn->code, 33, 0xdeadbeaf, (int)vb.floatcolorptr+4); 
+      FIXUP(dfn->code, 55, 0xdeadbeaf, (int)vb.floatcolorptr+8); 
+      FIXUP(dfn->code, 61, 0xdeadbeaf, (int)vb.floatcolorptr+12); 
+      return dfn;
+   }
+}
+
+struct dynfn *radeon_makeX86Color4ub( GLcontext *ctx, int key )
+{
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if (key & RADEON_CP_VC_FRMT_PKCOLOR) {
+      struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+      DFN ( _x86_Color4ub_ub, rmesa->vb.dfn_cache.Color4ub );
+      FIXUP(dfn->code, 18, 0x0, (int)vb.ubytecolorptr); 
+      FIXUP(dfn->code, 24, 0x0, (int)vb.ubytecolorptr+1); 
+      FIXUP(dfn->code, 30, 0x0, (int)vb.ubytecolorptr+2); 
+      FIXUP(dfn->code, 36, 0x0, (int)vb.ubytecolorptr+3); 
+      return dfn;
+   }
+   else
+      return 0;
+}
+
+
+struct dynfn *radeon_makeX86Color3fv( GLcontext *ctx, int key )
+{
+   if (key & (RADEON_CP_VC_FRMT_PKCOLOR|RADEON_CP_VC_FRMT_FPALPHA))
+      return 0;
+   else
+   {
+      struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+      if (RADEON_DEBUG & DEBUG_CODEGEN)
+	 fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+      DFN ( _x86_Color3fv_3f, rmesa->vb.dfn_cache.Color3fv );
+      FIXUP(dfn->code, 5, 0x0, (int)vb.floatcolorptr); 
+      return dfn;
+   }
+}
+
+struct dynfn *radeon_makeX86Color3f( GLcontext *ctx, int key )
+{
+   if (key & (RADEON_CP_VC_FRMT_PKCOLOR|RADEON_CP_VC_FRMT_FPALPHA))
+      return 0;
+   else
+   {
+      struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+      if (RADEON_DEBUG & DEBUG_CODEGEN)
+	 fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+      DFN ( _x86_Color3f_3f, rmesa->vb.dfn_cache.Color3f );
+      FIXUP(dfn->code, 1, 0x12345678, (int)vb.floatcolorptr); 
+      return dfn;
+   }
+}
+
+
+
+struct dynfn *radeon_makeX86TexCoord2fv( GLcontext *ctx, int key )
+{
+
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   DFN ( _x86_TexCoord2fv, rmesa->vb.dfn_cache.TexCoord2fv );
+   FIXUP(dfn->code, 5, 0x12345678, (int)vb.texcoordptr[0]); 
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86TexCoord2f( GLcontext *ctx, int key )
+{
+
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   DFN ( _x86_TexCoord2f, rmesa->vb.dfn_cache.TexCoord2f );
+   FIXUP(dfn->code, 1, 0x12345678, (int)vb.texcoordptr[0]); 
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86MultiTexCoord2fvARB( GLcontext *ctx, int key )
+{
+#if 0
+   static  char temp[] = {
+      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */
+      0x8b, 0x4c, 0x24, 0x08,          	/* mov    0x8(%esp,1),%ecx */
+      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */
+      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */
+      0x8b, 0x11,                	/* mov    (%ecx),%edx */
+      0xc1, 0xe0, 0x03,             	/* shl    $0x3,%eax */
+      0x8b, 0x49, 0x04,             	/* mov    0x4(%ecx),%ecx */
+      0x89, 0x90, 0, 0, 0, 0,/* mov    %edx,DEST(%eax) */
+      0x89, 0x88, 0, 0, 0, 0,/* mov    %ecx,DEST+8(%eax) */
+      0xc3,                     	/* ret     */
+   };
+   static char temp2[] = {
+      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */
+      0x8b, 0x4c, 0x24, 0x08,          	/* mov    0x8(%esp,1),%ecx */
+      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */
+      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */
+      0x8b, 0x14, 0x85, 0, 0, 0, 0, /* mov    DEST(,%eax,4),%edx */
+      0x8b, 0x01,                	/* mov    (%ecx),%eax */
+      0x89, 0x02,                	/* mov    %eax,(%edx) */
+      0x8b, 0x41, 0x04,             	/* mov    0x4(%ecx),%eax */
+      0x89, 0x42, 0x04,             	/* mov    %eax,0x4(%edx) */
+      0xc3,                     	/* ret     */
+   };
+#endif
+
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
+      (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
+      DFN ( _x86_MultiTexCoord2fvARB, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 26, 0xdeadbeef, (int)vb.texcoordptr[0]);	
+      FIXUP(dfn->code, 32, 0xdeadbeef, (int)vb.texcoordptr[0]+4);
+   } else {
+      DFN ( _x86_MultiTexCoord2fvARB_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 19, 0x0, (int)vb.texcoordptr);
+   }
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86MultiTexCoord2fARB( GLcontext *ctx, 
+						int key )
+{
+#if 0
+   static  char temp[] = {
+      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */
+      0x8b, 0x54, 0x24, 0x08,          	/* mov    0x8(%esp,1),%edx */
+      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */
+      0x8b, 0x4c, 0x24, 0x0c,          	/* mov    0xc(%esp,1),%ecx */
+      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */
+      0xc1, 0xe0, 0x03,             	/* shl    $0x3,%eax */
+      0x89, 0x90, 0, 0, 0, 0,	/* mov    %edx,DEST(%eax) */
+      0x89, 0x88, 0, 0, 0, 0,	/* mov    %ecx,DEST+8(%eax) */
+      0xc3,                     	/* ret     */
+   };
+
+   static char temp2[] = {
+      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */
+      0x8b, 0x54, 0x24, 0x08,          	/* mov    0x8(%esp,1),%edx */
+      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */
+      0x8b, 0x4c, 0x24, 0x0c,          	/* mov    0xc(%esp,1),%ecx */
+      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */
+      0x8b, 0x04, 0x85, 0, 0, 0, 0,     /* mov    DEST(,%eax,4),%eax */
+      0x89, 0x10,                	/* mov    %edx,(%eax) */
+      0x89, 0x48, 0x04,             	/* mov    %ecx,0x4(%eax) */
+      0xc3,                   	        /* ret     */
+   };
+#endif
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
+       (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
+      DFN ( _x86_MultiTexCoord2fARB, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 25, 0xdeadbeef, (int)vb.texcoordptr[0]); 
+      FIXUP(dfn->code, 31, 0xdeadbeef, (int)vb.texcoordptr[0]+4); 
+   }
+   else {
+      /* Note: this might get generated multiple times, even though the
+       * actual emitted code is the same.
+       */
+      DFN ( _x86_MultiTexCoord2fARB_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 23, 0x0, (int)vb.texcoordptr); 
+   }      
+   return dfn;
+}
+
+
+void radeonInitX86Codegen( struct dfn_generators *gen )
+{
+   gen->Vertex3f = radeon_makeX86Vertex3f;
+   gen->Vertex3fv = radeon_makeX86Vertex3fv;
+   gen->Color4ub = radeon_makeX86Color4ub; /* PKCOLOR only */
+   gen->Color4ubv = radeon_makeX86Color4ubv; /* PKCOLOR only */
+   gen->Normal3f = radeon_makeX86Normal3f;
+   gen->Normal3fv = radeon_makeX86Normal3fv;
+   gen->TexCoord2f = radeon_makeX86TexCoord2f;
+   gen->TexCoord2fv = radeon_makeX86TexCoord2fv;
+   gen->MultiTexCoord2fARB = radeon_makeX86MultiTexCoord2fARB;
+   gen->MultiTexCoord2fvARB = radeon_makeX86MultiTexCoord2fvARB;
+   gen->Color3f = radeon_makeX86Color3f;
+   gen->Color3fv = radeon_makeX86Color3fv;
+
+   /* Not done:
+    */
+/*     gen->Vertex2f = radeon_makeX86Vertex2f; */
+/*     gen->Vertex2fv = radeon_makeX86Vertex2fv; */
+/*     gen->Color3ub = radeon_makeX86Color3ub; */
+/*     gen->Color3ubv = radeon_makeX86Color3ubv; */
+/*     gen->Color4f = radeon_makeX86Color4f; */
+/*     gen->Color4fv = radeon_makeX86Color4fv; */
+/*     gen->TexCoord1f = radeon_makeX86TexCoord1f; */
+/*     gen->TexCoord1fv = radeon_makeX86TexCoord1fv; */
+/*     gen->MultiTexCoord1fARB = radeon_makeX86MultiTexCoord1fARB; */
+/*     gen->MultiTexCoord1fvARB = radeon_makeX86MultiTexCoord1fvARB; */
+}
+
+
+#else 
+
+void radeonInitX86Codegen( struct dfn_generators *gen )
+{
+   (void) gen;
+}
+
+#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxtmp_x86.S b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxtmp_x86.S
new file mode 100644
index 000000000..cfb0ecbd8
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxtmp_x86.S
@@ -0,0 +1,408 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#define GLOBL( x )	\
+.globl x;		\
+x##:
+
+.data
+.align 4
+GLOBL( _x86_Normal3fv)
+	movl 4(%esp), %eax      /* load 'v' off stack */
+	movl (%eax), %ecx       /* load v[0] */
+	movl 4(%eax), %edx      /* load v[1] */
+	movl 8(%eax), %eax      /* load v[2] */
+	movl %ecx, 0      	/* store v[0] to current vertex */
+	movl %edx, 4      	/* store v[1] to current vertex */
+	movl %eax, 8      	/* store v[2] to current vertex */
+	ret
+GLOBL ( _x86_Normal3fv_end )
+
+/*
+	vertex 3f vertex size 4
+*/
+	
+GLOBL ( _x86_Vertex3f_4 )
+	movl	(0), %ecx
+	movl	4(%esp), %eax
+	movl	8(%esp), %edx
+	movl	%eax, (%ecx)
+	movl	%edx, 4(%ecx)
+	movl	12(%esp), %eax
+	movl	(0), %edx
+	movl	%eax, 8(%ecx)
+	movl	%edx, 12(%ecx)
+	movl	(0), %eax
+	addl	$16, %ecx
+	dec 	%eax
+	movl	%ecx, (0)
+	movl	%eax, (0)
+	je	.1 
+	ret
+.1:	jmp	*0
+	
+GLOBL ( _x86_Vertex3f_4_end )
+
+/*
+	vertex 3f vertex size 6
+*/
+GLOBL ( _x86_Vertex3f_6 )
+	push	%edi
+	movl	(0), %edi
+	movl	8(%esp), %eax
+	movl	12(%esp), %edx
+	movl	16(%esp), %ecx
+	movl	%eax, (%edi)
+	movl	%edx, 4(%edi)
+	movl	%ecx, 8(%edi)
+	movl	(0), %eax
+	movl	(0), %edx
+	movl	(0), %ecx
+	movl	%eax, 12(%edi)
+	movl	%edx, 16(%edi)
+	movl	%ecx, 20(%edi)
+	addl	$24, %edi
+	movl	(0), %eax
+	movl	%edi, (0)
+	dec 	%eax
+	pop 	%edi
+	movl	%eax, (0)
+	je	.2
+	ret
+.2:	jmp	*0
+GLOBL ( _x86_Vertex3f_6_end )
+/*
+	vertex 3f generic size
+*/
+GLOBL ( _x86_Vertex3f )
+	push	%edi
+	push	%esi
+	movl	$0, %esi
+	movl	(0), %edi
+	movl	12(%esp), %eax
+	movl	16(%esp), %edx
+	movl	20(%esp), %ecx
+	movl	%eax, (%edi)
+	movl	%edx, 4(%edi)
+	movl	%ecx, 8(%edi)
+	addl	$12, %edi
+	movl	$0, %ecx
+	repz	movsl %ds:(%esi), %es:(%edi)
+	movl	(0), %eax
+	movl	%edi, (0)
+	dec 	%eax
+	movl	%eax, (0)
+	pop 	%esi
+	pop 	%edi
+	je  	.3
+	ret
+.3:	jmp	*0
+
+GLOBL ( _x86_Vertex3f_end )
+
+/*
+	Vertex 3fv vertex size 6
+*/
+GLOBL ( _x86_Vertex3fv_6 )
+	movl	(0), %eax
+	movl	4(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	%edx, (%eax)
+	movl	4(%ecx), %edx
+	movl	8(%ecx), %ecx
+	movl	%edx, 4(%eax)
+	movl	%ecx, 8(%eax)
+	movl	(28), %edx
+	movl	(32), %ecx
+	movl	%edx, 12(%eax)
+	movl	%ecx, 16(%eax)
+	movl	(36), %edx
+	movl	%edx, 20(%eax)
+	addl	$24, %eax
+	movl	%eax, 0
+	movl	4, %eax
+	dec 	%eax
+	movl	%eax, 4
+	je	.4
+	ret
+.4:	jmp    *8
+	
+GLOBL ( _x86_Vertex3fv_6_end )
+
+/*
+	Vertex 3fv vertex size 8
+*/
+GLOBL ( _x86_Vertex3fv_8 )
+	movl	(0), %eax
+	movl	4(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	%edx ,(%eax)
+	movl	4(%ecx) ,%edx
+	movl	8(%ecx) ,%ecx
+	movl	%edx, 4(%eax)
+	movl	%ecx, 8(%eax)
+	movl	(28), %edx
+	movl	(32), %ecx
+	movl	%edx, 12(%eax)
+	movl	%ecx, 16(%eax)
+	movl	(28), %edx
+	movl	(32), %ecx
+	movl	%edx, 20(%eax)
+	movl	%ecx, 24(%eax)
+	movl	(36), %edx
+	movl	%edx, 28(%eax)
+	addl	$32, %eax
+	movl	%eax, (0)
+	movl	4, %eax
+	dec	%eax
+	movl    %eax, (4)
+	je	.5
+	ret
+.5:	jmp    *8
+	
+GLOBL ( _x86_Vertex3fv_8_end )
+
+/*
+	Vertex 3fv generic vertex size
+*/
+GLOBL ( _x86_Vertex3fv )
+	movl	4(%esp), %edx
+	push	%edi
+	push	%esi
+	movl	(0x1010101), %edi
+	movl	(%edx), %eax
+	movl	4(%edx), %ecx
+	movl	8(%edx), %esi
+	movl	%eax, (%edi)
+	movl	%ecx, 4(%edi)
+	movl	%esi, 8(%edi)
+	addl	$12, %edi
+	movl	$6, %ecx
+	movl	$0x58, %esi
+	repz	movsl %ds:(%esi), %es:(%edi)
+	movl	%edi, (0x1010101)
+	movl	(0x2020202), %eax
+	pop	%esi
+	pop	%edi
+	dec	%eax
+	movl	%eax, (0x2020202)
+	je	.6
+	ret
+.6:	jmp    *0
+GLOBL ( _x86_Vertex3fv_end )
+
+/*
+	Normal 3f
+*/
+GLOBL ( _x86_Normal3f )
+	movl	$0x12345678, %edx
+	movl	4(%esp), %eax
+	movl	%eax, (%edx)
+	movl	8(%esp), %eax
+	movl	%eax, 4(%edx)
+	movl	12(%esp), %eax
+	movl	%eax, 8(%edx)
+	ret
+GLOBL ( _x86_Normal3f_end )
+
+/*
+	Color 4ubv_ub
+*/
+GLOBL ( _x86_Color4ubv_ub )
+	movl 4(%esp), %eax
+	movl $0x12345678, %edx
+	movl (%eax), %eax
+	movl %eax, (%edx)
+	ret
+GLOBL ( _x86_Color4ubv_ub_end )
+
+/*
+	Color 4ubv 4f
+*/
+GLOBL ( _x86_Color4ubv_4f )
+	push	%ebx
+	movl	$0, %edx
+	xor	%eax, %eax
+	xor	%ecx, %ecx
+	movl	8(%esp), %ebx
+	movl	(%ebx), %ebx
+	mov	%bl, %al
+	mov	%bh, %cl
+	movl	(%edx,%eax,4),%eax
+	movl	(%edx,%ecx,4),%ecx
+	movl	%eax, (0xdeadbeaf)
+	movl	%ecx, (0xdeadbeaf)
+	xor	%eax, %eax
+	xor	%ecx, %ecx
+	shr	$16, %ebx
+	mov	%bl, %al
+	mov	%bh, %cl
+	movl	(%edx,%eax,4), %eax
+	movl	(%edx,%ecx,4), %ecx
+	movl	%eax, (0xdeadbeaf)
+	movl	%ecx, (0xdeadbeaf)
+	pop	%ebx
+	ret
+GLOBL ( _x86_Color4ubv_4f_end )
+
+/*
+
+	Color4ub_ub
+*/
+GLOBL( _x86_Color4ub_ub )
+	push	%ebx
+	movl	8(%esp), %eax
+	movl	12(%esp), %edx
+	movl	16(%esp), %ecx
+	movl	20(%esp), %ebx
+	mov	%al, (0)
+	mov	%dl, (0)
+	mov	%cl, (0)
+	mov	%bl, (0)
+	pop	%ebx
+	ret
+GLOBL( _x86_Color4ub_ub_end )
+
+/*
+	Color3fv_3f
+*/
+GLOBL( _x86_Color3fv_3f )
+	movl	4(%esp), %eax
+	movl	$0, %edx
+	movl	(%eax), %ecx
+	movl	%ecx, (%edx)
+	movl	4(%eax), %ecx
+	movl	%ecx, 4(%edx)
+	movl	8(%eax), %ecx
+	movl	%ecx, 8(%edx)
+	ret
+GLOBL( _x86_Color3fv_3f_end )
+
+/*
+	Color3f_3f
+*/
+GLOBL( _x86_Color3f_3f )
+	movl	$0x12345678, %edx
+	movl	4(%esp), %eax
+	movl	%eax, (%edx)
+	movl	8(%esp,1), %eax
+	movl	%eax, 4(%edx)
+	movl	12(%esp), %eax
+	movl	%eax, 8(%edx)
+	ret
+GLOBL( _x86_Color3f_3f_end )
+
+/*
+	TexCoord2fv
+*/
+
+GLOBL( _x86_TexCoord2fv )
+	movl	4(%esp), %eax
+	movl	$0x12345678, %edx
+	movl	(%eax), %ecx
+	movl	4(%eax), %eax
+	movl	%ecx, (%edx)
+	movl	%eax, 4(%edx)
+	ret
+
+GLOBL( _x86_TexCoord2fv_end )
+/*
+	TexCoord2f
+*/
+GLOBL( _x86_TexCoord2f )
+	movl	$0x12345678, %edx
+	movl	4(%esp), %eax
+	movl	8(%esp), %ecx
+	movl	%eax, (%edx)
+	movl	%ecx, 4(%edx)
+	ret
+GLOBL( _x86_TexCoord2f_end )
+
+/*
+	MultiTexCoord2fvARB st0/st1
+*/
+GLOBL( _x86_MultiTexCoord2fvARB )
+
+	movl	4(%esp), %eax
+	movl	8(%esp), %ecx
+	sub	$0x84c0, %eax
+	and	$1, %eax
+	movl	(%ecx), %edx
+	shl	$3, %eax
+	movl	4(%ecx), %ecx
+	movl	%edx, 0xdeadbeef(%eax)
+	movl	%ecx, 0xdeadbeef(%eax)
+	ret
+GLOBL( _x86_MultiTexCoord2fvARB_end )
+/*
+	MultiTexCoord2fvARB
+*/
+
+GLOBL( _x86_MultiTexCoord2fvARB_2 )
+	movl	4(%esp,1), %eax
+	movl	8(%esp,1), %ecx
+	sub	$0x84c0, %eax
+	and	$0x1, %eax
+	movl	0(,%eax,4), %edx
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	4(%ecx), %eax
+	movl	%eax, 4(%edx)
+	ret
+
+GLOBL( _x86_MultiTexCoord2fvARB_2_end )
+
+/*
+	MultiTexCoord2fARB st0/st1
+*/
+GLOBL( _x86_MultiTexCoord2fARB )
+	movl	4(%esp), %eax
+	movl	8(%esp), %edx
+	sub	$0x84c0, %eax
+	movl	12(%esp), %ecx
+	and	$1, %eax
+	shl	$3, %eax
+	movl	%edx, 0xdeadbeef(%eax)
+	movl	%ecx, 0xdeadbeef(%eax)
+	ret
+GLOBL( _x86_MultiTexCoord2fARB_end )
+
+/*
+	MultiTexCoord2fARB
+*/
+GLOBL( _x86_MultiTexCoord2fARB_2 )
+	movl	4(%esp), %eax
+	movl	8(%esp), %edx
+	sub	$0x84c0, %eax
+	movl	12(%esp,1), %ecx
+	and	$1,%eax
+	movl	0(,%eax,4), %eax
+	movl	%edx, (%eax)
+	movl	%ecx, 4(%eax)
+	ret
+GLOBL( _x86_MultiTexCoord2fARB_2_end )
diff --git a/xc/lib/GL/mesa/src/drv/sis/sis_mesa.c b/xc/lib/GL/mesa/src/drv/sis/sis_mesa.c
index a631000e6..4d03db9fd 100644
--- a/xc/lib/GL/mesa/src/drv/sis/sis_mesa.c
+++ b/xc/lib/GL/mesa/src/drv/sis/sis_mesa.c
@@ -1404,8 +1404,9 @@ sis_update_drawable_state (GLcontext * ctx)
 }
 
 void
-sis_GetBufferSize (GLcontext * ctx, GLuint * width, GLuint * height)
+sis_GetBufferSize (GLframebuffer *buffer, GLuint * width, GLuint * height)
 {
+  GET_CURRENT_CONTEXT(ctx);
   XMesaContext xmesa = (XMesaContext) ctx->DriverCtx;
   __GLSiScontext *hwcx = (__GLSiScontext *) xmesa->private;
 
diff --git a/xc/lib/GL/mesa/src/drv/sis/sis_mesa.h b/xc/lib/GL/mesa/src/drv/sis/sis_mesa.h
index 309a1ba8f..271874cdb 100644
--- a/xc/lib/GL/mesa/src/drv/sis/sis_mesa.h
+++ b/xc/lib/GL/mesa/src/drv/sis/sis_mesa.h
@@ -44,7 +44,7 @@ GLbitfield sis_Clear (GLcontext * ctx, GLbitfield mask, GLboolean all,
 GLboolean sis_SetDrawBuffer (GLcontext * ctx, GLenum mode);
 void sis_SetReadBuffer (GLcontext *ctx, GLframebuffer *colorBuffer,
                         GLenum buffer);
-void sis_GetBufferSize (GLcontext * ctx, GLuint * width, GLuint * height);
+void sis_GetBufferSize (GLframebuffer * buffer, GLuint * width, GLuint * height);
 const char *sis_ExtensionString (GLcontext * ctx);
 const GLubyte *sis_GetString (GLcontext * ctx, GLenum name);
 void sis_Finish (GLcontext * ctx);
diff --git a/xc/lib/GL/mesa/src/drv/tdfx/tdfx_state.c b/xc/lib/GL/mesa/src/drv/tdfx/tdfx_state.c
index cd321720b..9469814d9 100644
--- a/xc/lib/GL/mesa/src/drv/tdfx/tdfx_state.c
+++ b/xc/lib/GL/mesa/src/drv/tdfx/tdfx_state.c
@@ -996,7 +996,7 @@ static void tdfxDDEnable( GLcontext *ctx, GLenum cap, GLboolean state )
 
 /* Set the buffer used for drawing */
 /* XXX support for separate read/draw buffers hasn't been tested */
-static GLboolean tdfxDDSetDrawBuffer( GLcontext *ctx, GLenum mode )
+static void tdfxDDSetDrawBuffer( GLcontext *ctx, GLenum mode )
 {
    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
 
@@ -1011,22 +1011,22 @@ static GLboolean tdfxDDSetDrawBuffer( GLcontext *ctx, GLenum mode )
       fxMesa->DrawBuffer = GR_BUFFER_FRONTBUFFER;
       fxMesa->new_state |= TDFX_NEW_RENDER;
       FALLBACK( fxMesa, TDFX_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      return GL_TRUE;
+      break;
 
    case GL_BACK_LEFT:
       fxMesa->DrawBuffer = GR_BUFFER_BACKBUFFER;
       fxMesa->new_state |= TDFX_NEW_RENDER;
       FALLBACK( fxMesa, TDFX_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      return GL_TRUE;
+      break;
 
    case GL_NONE:
       FX_grColorMaskv( ctx, false4 );
       FALLBACK( fxMesa, TDFX_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      return GL_TRUE;
+      break;
 
    default:
       FALLBACK( fxMesa, TDFX_FALLBACK_DRAW_BUFFER, GL_TRUE );
-      return GL_FALSE;
+      break;
    }
 }
 
diff --git a/xc/programs/Xserver/GL/dri/dri.c b/xc/programs/Xserver/GL/dri/dri.c
index 256a16986..e74aef391 100644
--- a/xc/programs/Xserver/GL/dri/dri.c
+++ b/xc/programs/Xserver/GL/dri/dri.c
@@ -844,14 +844,26 @@ DRIClipNotifyAllDrawables(ScreenPtr pScreen)
 static void
 DRITransitionToSharedBuffers(ScreenPtr pScreen)
 {
+    DRIScreenPrivPtr pDRIPriv = DRI_SCREEN_PRIV(pScreen);
+    DRIInfoPtr pDRIInfo = pDRIPriv->pDriverInfo;
+
     DRIClipNotifyAllDrawables( pScreen );
+
+    if (pDRIInfo->TransitionSingleToMulti3D)
+	pDRIInfo->TransitionSingleToMulti3D( pScreen );
 }
 
 
 static void
 DRITransitionToPrivateBuffers(ScreenPtr pScreen)
 {
+    DRIScreenPrivPtr pDRIPriv = DRI_SCREEN_PRIV(pScreen);
+    DRIInfoPtr pDRIInfo = pDRIPriv->pDriverInfo;
+
     DRIClipNotifyAllDrawables( pScreen );
+
+    if (pDRIInfo->TransitionMultiToSingle3D)
+	pDRIInfo->TransitionMultiToSingle3D( pScreen );
 }
 
 
@@ -861,6 +873,8 @@ DRITransitionTo3d(ScreenPtr pScreen)
     DRIScreenPrivPtr pDRIPriv = DRI_SCREEN_PRIV(pScreen);
     DRIInfoPtr pDRIInfo = pDRIPriv->pDriverInfo;
 
+    DRIClipNotifyAllDrawables( pScreen );
+
     if (pDRIInfo->TransitionTo3d)
 	pDRIInfo->TransitionTo3d( pScreen );
 }
@@ -871,6 +885,8 @@ DRITransitionTo2d(ScreenPtr pScreen)
     DRIScreenPrivPtr pDRIPriv = DRI_SCREEN_PRIV(pScreen);
     DRIInfoPtr pDRIInfo = pDRIPriv->pDriverInfo;
 
+    DRIClipNotifyAllDrawables( pScreen );
+
     if (pDRIInfo->TransitionTo2d)
 	pDRIInfo->TransitionTo2d( pScreen );
 }
diff --git a/xc/programs/Xserver/GL/dri/dri.h b/xc/programs/Xserver/GL/dri/dri.h
index db1be779d..223162ce6 100644
--- a/xc/programs/Xserver/GL/dri/dri.h
+++ b/xc/programs/Xserver/GL/dri/dri.h
@@ -121,6 +121,9 @@ typedef struct {
 			       CARD32 indx);
     void        (*TransitionTo3d)(ScreenPtr pScreen);
     void        (*TransitionTo2d)(ScreenPtr pScreen);
+    void        (*TransitionSingleToMulti3D)(ScreenPtr pScreen);
+    void        (*TransitionMultiToSingle3D)(ScreenPtr pScreen);
+
     void	(*SetDrawableIndex)(WindowPtr pWin, CARD32 indx);
     Bool        (*OpenFullScreen)(ScreenPtr pScreen);
     Bool        (*CloseFullScreen)(ScreenPtr pScreen);
diff --git a/xc/programs/Xserver/hw/xfree86/drivers/ati/Imakefile b/xc/programs/Xserver/hw/xfree86/drivers/ati/Imakefile
index 68694423a..078fdc18d 100644
--- a/xc/programs/Xserver/hw/xfree86/drivers/ati/Imakefile
+++ b/xc/programs/Xserver/hw/xfree86/drivers/ati/Imakefile
@@ -185,6 +185,7 @@ INCLUDES = -I. -I$(XF86COMSRC) -I$(XF86OSSRC) -I$(XF86SRC) \
            -I$(XF86SRC)/vgahw -I$(XF86SRC)/fbdevhw \
            -I$(SERVERSRC)/cfb -I$(SERVERSRC)/mfb \
            -I$(SERVERSRC)/fb -I$(SERVERSRC)/mi \
+	   -I$(SERVERSRC)/miext/shadow \
            -I$(SERVERSRC)/render -I$(SERVERSRC)/Xext -I$(SERVERSRC)/include \
            $(DRIINCLUDES) -I$(FONTINCSRC) -I$(EXTINCSRC) -I$(XINCLUDESRC)
 #endif
diff --git a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon.h b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon.h
index 08b5ee810..0e96df10e 100644
--- a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon.h
+++ b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon.h
@@ -369,6 +369,10 @@ typedef struct {
     unsigned char     *PCI;             /* Map */
 
     Bool              depthMoves;       /* Enable depth moves -- slow! */
+ 
+    Bool              allowPageFlip;    /* Enable 3d page flipping */
+    Bool              have3DWindows;    /* Are there any 3d clients? */
+    int               drmMinor;
 
     drmSize           agpSize;
     drmHandle         agpMemHandle;     /* Handle from drmAgpAlloc */
@@ -451,10 +455,6 @@ typedef struct {
     CARD32            re_width_height;
 
     CARD32            aux_sc_cntl;
-
-#ifdef PER_CONTEXT_SAREA
-    int 	      perctx_sarea_size;
-#endif
 #endif
 
     XF86VideoAdaptorPtr adaptor;
diff --git a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_common.h b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_common.h
index 89f92cc8f..ec789af39 100644
--- a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_common.h
+++ b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_common.h
@@ -38,6 +38,8 @@
 #ifndef _RADEON_COMMON_H_
 #define _RADEON_COMMON_H_
 
+#include "xf86drm.h"
+
 /* WARNING: If you change any of these defines, make sure to change
  * the kernel include file as well (radeon_drm.h)
  */
@@ -62,6 +64,7 @@
 #define DRM_RADEON_VERTEX2                0x0f
 #define DRM_RADEON_CMDBUF                 0x10
 #define DRM_RADEON_GETPARAM               0x11
+#define DRM_RADEON_FLIP                   0x12
 #define DRM_RADEON_MAX_DRM_COMMAND_INDEX  0x39
 
 
@@ -229,6 +232,15 @@ typedef struct {
 	unsigned int dirty;
 } drmRadeonState;
 
+/* 1.1 vertex ioctl.  Used in compatibility modes.
+ */
+typedef struct {
+	int prim;
+	int idx;			/* Index of vertex buffer */
+	int count;			/* Number of vertices in buffer */
+	int discard;			/* Client finished with buffer? */
+} drmRadeonVertex;
+
 typedef struct {
 	unsigned int start;
 	unsigned int finish;
@@ -250,4 +262,79 @@ typedef struct {
 #define RADEON_MAX_STATES 16
 #define RADEON_MAX_PRIMS  64
 
+/* Command buffer.  Replace with true dma stream?
+ */
+typedef struct {
+	int bufsz;
+	char *buf;
+	int nbox;
+        drmClipRect *boxes;
+} drmRadeonCmdBuffer;
+
+/* New style per-packet identifiers for use in cmd_buffer ioctl with
+ * the RADEON_EMIT_PACKET command.  Comments relate new packets to old
+ * state bits and the packet size:
+ */
+#define RADEON_EMIT_PP_MISC                         0 /* context/7 */
+#define RADEON_EMIT_PP_CNTL                         1 /* context/3 */
+#define RADEON_EMIT_RB3D_COLORPITCH                 2 /* context/1 */
+#define RADEON_EMIT_RE_LINE_PATTERN                 3 /* line/2 */
+#define RADEON_EMIT_SE_LINE_WIDTH                   4 /* line/1 */
+#define RADEON_EMIT_PP_LUM_MATRIX                   5 /* bumpmap/1 */
+#define RADEON_EMIT_PP_ROT_MATRIX_0                 6 /* bumpmap/2 */
+#define RADEON_EMIT_RB3D_STENCILREFMASK             7 /* masks/3 */
+#define RADEON_EMIT_SE_VPORT_XSCALE                 8 /* viewport/6 */
+#define RADEON_EMIT_SE_CNTL                         9 /* setup/2 */
+#define RADEON_EMIT_SE_CNTL_STATUS                  10 /* setup/1 */
+#define RADEON_EMIT_RE_MISC                         11 /* misc/1 */
+#define RADEON_EMIT_PP_TXFILTER_0                   12 /* tex0/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_0               13 /* tex0/1 */
+#define RADEON_EMIT_PP_TXFILTER_1                   14 /* tex1/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_1               15 /* tex1/1 */
+#define RADEON_EMIT_PP_TXFILTER_2                   16 /* tex2/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_2               17 /* tex2/1 */
+#define RADEON_EMIT_SE_ZBIAS_FACTOR                 18 /* zbias/2 */
+#define RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT           19 /* tcl/11 */
+#define RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED   20 /* material/17 */
+#define RADEON_MAX_STATE_PACKETS                    21
+
+
+/* Commands understood by cmd_buffer ioctl.  More can be added but
+ * obviously these can't be removed or changed:
+ */
+#define RADEON_CMD_PACKET      1 /* emit one of the register packets above */
+#define RADEON_CMD_SCALARS     2 /* emit scalar data */
+#define RADEON_CMD_VECTORS     3 /* emit vector data */
+#define RADEON_CMD_DMA_DISCARD 4 /* discard current dma buf */
+#define RADEON_CMD_PACKET3     5 /* emit hw packet */
+#define RADEON_CMD_PACKET3_CLIP 6 /* emit hw packet wrapped in cliprects */
+
+typedef union {
+	int i;
+	struct { 
+		char cmd_type, pad0, pad1, pad2;
+	} header;
+	struct { 
+		char cmd_type, packet_id, pad0, pad1;
+	} packet;
+	struct { 
+		char cmd_type, offset, stride, count; 
+	} scalars;
+	struct { 
+		char cmd_type, offset, stride, count; 
+	} vectors;
+	struct { 
+		char cmd_type, buf_idx, pad0, pad1; 
+	} dma;
+} drmRadeonCmdHeader;
+
+
+
+typedef struct drm_radeon_getparam {
+	int param;
+	int *value;
+} drmRadeonGetParam;
+
+#define RADEON_PARAM_AGP_BUFFER_OFFSET 0x1
+
 #endif
diff --git a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.c b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.c
index aead524be..e16e79481 100644
--- a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.c
+++ b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.c
@@ -46,6 +46,8 @@
 #include "xf86.h"
 #include "windowstr.h"
 
+
+#include "shadow.h"
 				/* GLX/DRI/DRM definitions */
 #define _XF86DRI_SERVER_
 #include "GL/glxtokens.h"
@@ -68,6 +70,16 @@
 # define DRM_PAGE_SIZE 4096
 #endif
 
+
+static Bool RADEONDRICloseFullScreen(ScreenPtr pScreen);
+static Bool RADEONDRIOpenFullScreen(ScreenPtr pScreen);
+static void RADEONDRITransitionTo2d(ScreenPtr pScreen);
+static void RADEONDRITransitionTo3d(ScreenPtr pScreen);
+static void RADEONDRITransitionMultiToSingle3d(ScreenPtr pScreen);
+static void RADEONDRITransitionSingleToMulti3d(ScreenPtr pScreen);
+
+static void RADEONDRIShadowUpdate (ScreenPtr pScreen, shadowBufPtr pBuf);
+
 /* Initialize the visual configs that are supported by the hardware.
    These are combined with the visual configs that the indirect
    rendering core supports, and the intersection is exported to the
@@ -262,36 +274,6 @@ static Bool RADEONCreateContext(ScreenPtr pScreen, VisualPtr visual,
 				drmContext hwContext, void *pVisualConfigPriv,
 				DRIContextType contextStore)
 {
-#ifdef PER_CONTEXT_SAREA
-    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
-    RADEONInfoPtr info = RADEONPTR(pScrn);
-    RADEONDRIContextPtr ctx_info;
-
-    ctx_info = (RADEONDRIContextPtr)contextStore;
-    if (!ctx_info) return FALSE;
-
-    if (drmAddMap(info->drmFD, 0,
-		  info->perctx_sarea_size,
-		  DRM_SHM,
-		  DRM_REMOVABLE,
-		  &ctx_info->sarea_handle) < 0) {
-        xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		   "[dri] could not create private sarea for ctx id (%d)\n",
-		   (int)hwContext);
-        return FALSE;
-    }
-
-    if (drmAddContextPrivateMapping(info->drmFD, hwContext,
-				    ctx_info->sarea_handle) < 0) {
-        xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		   "[dri] could not associate private sarea to ctx id (%d)\n",
-		   (int)hwContext);
-        drmRmMap(info->drmFD, ctx_info->sarea_handle);
-        return FALSE;
-    }
-
-    ctx_info->ctx_id = hwContext;
-#endif
     return TRUE;
 }
 
@@ -299,20 +281,6 @@ static Bool RADEONCreateContext(ScreenPtr pScreen, VisualPtr visual,
 static void RADEONDestroyContext(ScreenPtr pScreen, drmContext hwContext,
 				 DRIContextType contextStore)
 {
-#ifdef PER_CONTEXT_SAREA
-    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
-    RADEONInfoPtr info = RADEONPTR(pScrn);
-    RADEONDRIContextPtr ctx_info;
-
-    ctx_info = (RADEONDRIContextPtr) contextStore;
-    if (!ctx_info) return;
-
-    if (drmRmMap(info->drmFD, ctx_info->sarea_handle) < 0) {
-        xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		   "[dri] could not remove private sarea for ctx id (%d)\n",
-		   (int)hwContext);
-    }
-#endif
 }
 
 /* Called when the X server is woken up to allow the last client's
@@ -720,6 +688,9 @@ static void RADEONDRIMoveBuffers(WindowPtr pParent, DDXPointRec ptOldOrg,
     info->accel->NeedToSync = TRUE;
 }
 
+
+
+
 /* Initialize the AGP state.  Request memory for use in AGP space, and
    initialize the Radeon registers to point to that memory. */
 static Bool RADEONDRIAgpInit(RADEONInfoPtr info, ScreenPtr pScreen)
@@ -1179,8 +1150,7 @@ static void RADEONDRISAREAInit(ScreenPtr pScreen,
 		    RADEON_ANTI_ALIAS_NONE);
 
     ctx->rb3d_cntl = (RADEON_PLANE_MASK_ENABLE |
-		      color_fmt |
-		      RADEON_ZBLOCK16);
+		      color_fmt | (1<<15));
 
     ctx->rb3d_coloroffset = (info->backOffset & RADEON_COLOROFFSET_MASK);
 
@@ -1237,43 +1207,14 @@ static void RADEONDRISAREAInit(ScreenPtr pScreen,
     ctx->se_vport_zscale  = 0x00000000;
     ctx->se_vport_zoffset = 0x00000000;
 
-    ctx->se_cntl_status = (RADEON_VC_NO_SWAP |
-			   RADEON_TCL_BYPASS);
-
-#ifdef TCL_ENABLE
-   /* FIXME: Obviously these need to be properly initialized */
-    ctx->se_tcl_material_emmissive.red   = 0x00000000;
-    ctx->se_tcl_material_emmissive.green = 0x00000000;
-    ctx->se_tcl_material_emmissive.blue  = 0x00000000;
-    ctx->se_tcl_material_emmissive.alpha = 0x00000000;
-
-    ctx->se_tcl_material_ambient.red     = 0x00000000;
-    ctx->se_tcl_material_ambient.green   = 0x00000000;
-    ctx->se_tcl_material_ambient.blue    = 0x00000000;
-    ctx->se_tcl_material_ambient.alpha   = 0x00000000;
-
-    ctx->se_tcl_material_diffuse.red     = 0x00000000;
-    ctx->se_tcl_material_diffuse.green   = 0x00000000;
-    ctx->se_tcl_material_diffuse.blue    = 0x00000000;
-    ctx->se_tcl_material_diffuse.alpha   = 0x00000000;
-
-    ctx->se_tcl_material_specular.red    = 0x00000000;
-    ctx->se_tcl_material_specular.green  = 0x00000000;
-    ctx->se_tcl_material_specular.blue   = 0x00000000;
-    ctx->se_tcl_material_specular.alpha  = 0x00000000;
-
-    ctx->se_tcl_shininess                = 0x00000000;
-    ctx->se_tcl_output_vtx_fmt           = 0x00000000;
-    ctx->se_tcl_output_vtx_sel           = 0x00000000;
-    ctx->se_tcl_matrix_select_0          = 0x00000000;
-    ctx->se_tcl_matrix_select_1          = 0x00000000;
-    ctx->se_tcl_ucp_vert_blend_ctl       = 0x00000000;
-    ctx->se_tcl_texture_proc_ctl         = 0x00000000;
-    ctx->se_tcl_light_model_ctl          = 0x00000000;
-    for ( i = 0 ; i < 4 ; i++ ) {
-	ctx->se_tcl_per_light_ctl[i]     = 0x00000000;
+    if (info->IsM6) {
+       ctx->se_cntl_status = (RADEON_VC_NO_SWAP |
+			      RADEON_TCL_BYPASS);
     }
-#endif
+    else {
+       ctx->se_cntl_status = (RADEON_VC_NO_SWAP);
+    }
+
 
     ctx->re_top_left = ((0 << RADEON_RE_LEFT_SHIFT) |
 			(0 << RADEON_RE_TOP_SHIFT) );
@@ -1378,29 +1319,12 @@ Bool RADEONDRIScreenInit(ScreenPtr pScreen)
 					    < RADEON_MAX_DRAWABLES
 					    ? SAREA_MAX_DRAWABLES
 					    : RADEON_MAX_DRAWABLES);
-#ifdef PER_CONTEXT_SAREA
-    /* This is only here for testing per-context SAREAs.  When used, the
-       magic number below would be properly defined in a header file. */
-    info->perctx_sarea_size = 64 * 1024;
-#endif
 
-#ifdef NOT_DONE
-    /* FIXME: Need to extend DRI protocol to pass this size back to
-     * client for SAREA mapping that includes a device private record
-     */
-    pDRIInfo->SAREASize =
-	((sizeof(XF86DRISAREARec) + 0xfff) & 0x1000); /* round to page */
-    /* + shared memory device private rec */
-#else
-    /* For now the mapping works by using a fixed size defined
-     * in the SAREA header
-     */
     if (sizeof(XF86DRISAREARec)+sizeof(RADEONSAREAPriv)>SAREA_MAX) {
 	ErrorF("Data does not fit in SAREA\n");
 	return FALSE;
     }
     pDRIInfo->SAREASize = SAREA_MAX;
-#endif
 
     if (!(pRADEONDRI = (RADEONDRIPtr)xcalloc(sizeof(RADEONDRIRec),1))) {
 	DRIDestroyInfoRec(info->pDRIInfo);
@@ -1417,6 +1341,12 @@ Bool RADEONDRIScreenInit(ScreenPtr pScreen)
     pDRIInfo->InitBuffers    = RADEONDRIInitBuffers;
     pDRIInfo->MoveBuffers    = RADEONDRIMoveBuffers;
     pDRIInfo->bufferRequests = DRI_ALL_WINDOWS;
+    pDRIInfo->OpenFullScreen = RADEONDRIOpenFullScreen;
+    pDRIInfo->CloseFullScreen = RADEONDRICloseFullScreen;
+    pDRIInfo->TransitionTo2d = RADEONDRITransitionTo2d;
+    pDRIInfo->TransitionTo3d = RADEONDRITransitionTo3d;
+    pDRIInfo->TransitionSingleToMulti3D = RADEONDRITransitionSingleToMulti3d;
+    pDRIInfo->TransitionMultiToSingle3D = RADEONDRITransitionMultiToSingle3d;
 
     pDRIInfo->createDummyCtx     = TRUE;
     pDRIInfo->createDummyCtxPriv = FALSE;
@@ -1493,6 +1423,15 @@ Bool RADEONDRIScreenInit(ScreenPtr pScreen)
             RADEONDRICloseScreen(pScreen);
             return FALSE;
 	}
+	if (version->version_minor < 3) {
+            xf86DrvMsg(pScreen->myNum, X_WARNING,
+                "[dri] Some DRI features disabled because of version mismatch.\n"
+ 	        "[dri] radeon.o kernel module version is %d.%d.%d but 1.1.3 is preferred.\n",
+                version->version_major,
+                version->version_minor,
+                version->version_patchlevel);
+	    info->drmMinor = version->version_minor;
+	}
 	drmFreeVersion(version);
     }
 
@@ -1622,10 +1561,15 @@ Bool RADEONDRIFinishScreenInit(ScreenPtr pScreen)
 
     pRADEONDRI->sarea_priv_offset = sizeof(XF86DRISAREARec);
 
-#ifdef PER_CONTEXT_SAREA
-    /* Set per-context SAREA size */
-    pRADEONDRI->perctx_sarea_size = info->perctx_sarea_size;
-#endif
+
+   /* Have shadow run only while there is 3d active.
+    */
+    if (info->allowPageFlip /*  && info->drmMinor >= 3 */) {
+       shadowSetup (pScreen);   
+       shadowAdd( pScreen, 0, RADEONDRIShadowUpdate, 0, 0, 0 );
+    }
+    else
+       info->allowPageFlip = 0;
 
     return TRUE;
 }
@@ -1715,3 +1659,166 @@ void RADEONDRICloseScreen(ScreenPtr pScreen)
 	info->pVisualConfigsPriv = NULL;
     }
 }
+
+
+
+/* Fullscreen hooks.  The DRI fullscreen mode can probably be removed
+ * as it adds little or nothing above the mechanism below.  (and isn't
+ * widely used)
+ */
+static Bool
+RADEONDRIOpenFullScreen(ScreenPtr pScreen)
+{
+  return TRUE;
+}
+
+static Bool
+RADEONDRICloseFullScreen(ScreenPtr pScreen)
+{
+  return TRUE;
+}
+
+
+
+/* Use callbacks from dri.c to support pageflipping mode for a single
+ * 3d context without need for any specific full-screen extension.
+ *
+ * Also see tdfx driver for example of using these callbacks to
+ * allocate and free 3d-specific memory on demand.  
+ */
+
+
+/* Use the miext/shadow module to maintain a list of dirty rectangles.
+ * These are blitted to the back buffer to keep both buffers clean
+ * during page-flipping when the 3d application isn't fullscreen.
+ *
+ * Unlike most use of the shadow code, both buffers are in video
+ * memory.  
+ *
+ * An alternative to this would be to organize for all on-screen
+ * drawing operations to be duplicated for the two buffers.  That
+ * might be faster, but seems like a lot more work...  
+ */
+
+
+/* This should be done *before* XAA syncs or fires its buffer.
+ * Otherwise will have to fire it again???
+ */
+static void
+RADEONDRIShadowUpdate (ScreenPtr pScreen, shadowBufPtr pBuf)
+{
+   ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+   RADEONInfoPtr info = RADEONPTR(pScrn);
+   RegionPtr damage = &pBuf->damage;
+   int i, num =  REGION_NUM_RECTS(damage);
+   BoxPtr pbox = REGION_RECTS(damage);
+   RADEONSAREAPrivPtr pSAREAPriv = DRIGetSAREAPrivate(pScreen);
+
+   /* Don't want to do this when no 3d is active and pages are
+    * right-way-round :
+    */
+   if (!info->allowPageFlip ||
+       (!info->have3DWindows && pSAREAPriv->pfCurrentPage == 0))
+      return;
+
+   (*info->accel->SetupForScreenToScreenCopy)(pScrn, 
+					      1, 1, GXcopy,
+					      (CARD32)(-1), -1);
+
+   for (i = 0 ; i < num ; i++, pbox++) {
+      (*info->accel->SubsequentScreenToScreenCopy)(pScrn,
+						   pbox->x1,
+						   pbox->y1,
+						   pbox->x1 + info->backX, 
+						   pbox->y1 + info->backY,
+						   pbox->x2 - pbox->x1, 
+						   pbox->y2 - pbox->y1);
+	
+   }
+} 
+
+
+static void
+RADEONDRITransitionSingleToMulti3d(ScreenPtr pScreen)
+{
+   /* Tell the clients not to pageflip.  How?
+    *   -- Field in sarea, plus bumping the window counters.
+    *   -- DRM needs to cope with Front-to-Back swapbuffers.
+    */
+   RADEONSAREAPrivPtr pSAREAPriv = DRIGetSAREAPrivate(pScreen);
+   pSAREAPriv->pfAllowPageFlip = 0;
+}
+
+static void
+RADEONDRITransitionMultiToSingle3d(ScreenPtr pScreen)
+{
+   /* Let the remaining 3d app start page flipping again.
+    */
+   ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+   RADEONInfoPtr info = RADEONPTR(pScrn);
+   RADEONSAREAPrivPtr pSAREAPriv = DRIGetSAREAPrivate(pScreen);
+
+   if (info->allowPageFlip) 
+      pSAREAPriv->pfAllowPageFlip = 1;
+}
+
+
+static void
+RADEONDRITransitionTo3d(ScreenPtr pScreen)
+{
+   ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+   RADEONInfoPtr info = RADEONPTR(pScrn);
+   RADEONSAREAPrivPtr pSAREAPriv = DRIGetSAREAPrivate(pScreen);
+
+   /* Do we have to repair the back buffer?
+    */
+   if (info->allowPageFlip && !pSAREAPriv->pfAllowPageFlip) {
+      /* Duplicate the frontbuffer to the backbuffer:
+       */
+      (*info->accel->SetupForScreenToScreenCopy)(pScrn, 
+						 1, 1, GXcopy,
+						 (CARD32)(-1), -1);
+
+      (*info->accel->SubsequentScreenToScreenCopy)(pScrn,
+						   0,
+						   0,
+						   info->backX, 
+						   info->backY,
+						   pScrn->virtualX, 
+						   pScrn->virtualY);
+	
+      pSAREAPriv->pfAllowPageFlip = 1;
+   }
+
+   info->have3DWindows = 1;
+}
+
+
+static void
+RADEONDRITransitionTo2d(ScreenPtr pScreen)
+{
+   ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+   RADEONInfoPtr info = RADEONPTR(pScrn);
+   RADEONSAREAPrivPtr pSAREAPriv = DRIGetSAREAPrivate(pScreen);
+
+   /* Go back to the front buffer if things were left in a flipped state.
+    */
+   if (pSAREAPriv->pfCurrentPage != 0) {
+      /* Won't work as we're not holding the lock at this point:
+       */
+/*       drmRadeonFlipBuffers( info->drmFD ); */
+   }
+
+   /* Shut down shadowing if we've made it back to the front page: 
+    */
+   if (pSAREAPriv->pfCurrentPage == 0) {
+      pSAREAPriv->pfAllowPageFlip = 0;
+   }
+/*    else  */
+/*       xf86DrvMsg(pScreen->myNum, X_WARNING, */
+/* 		 "[dri] RADEONDRITransitionTo2d failed to unflip buffers.\n"); */
+
+   info->have3DWindows = 0;
+}
+
+
diff --git a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.h b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.h
index 4a4f53724..35c34693b 100644
--- a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.h
+++ b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.h
@@ -98,9 +98,6 @@ typedef struct {
     int           agpTexOffset;
     unsigned int  sarea_priv_offset;
 
-#ifdef PER_CONTEXT_SAREA
-    drmSize	  perctx_sarea_size;
-#endif
 } RADEONDRIRec, *RADEONDRIPtr;
 
 #endif
diff --git a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dripriv.h b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dripriv.h
index 530fd4abb..68f139a42 100644
--- a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dripriv.h
+++ b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dripriv.h
@@ -52,13 +52,8 @@ typedef struct {
 } RADEONConfigPrivRec, *RADEONConfigPrivPtr;
 
 typedef struct {
-#ifdef PER_CONTEXT_SAREA
-    drmContext ctx_id;
-    drmHandle sarea_handle;
-#else
     /* Nothing here yet */
     int dummy;
-#endif
 } RADEONDRIContextRec, *RADEONDRIContextPtr;
 
 #endif
diff --git a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_driver.c b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_driver.c
index e60f081ce..63f405701 100644
--- a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_driver.c
+++ b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_driver.c
@@ -52,7 +52,7 @@
  * This server does not yet support these XFree86 4.0 features:
  * !!!! FIXME !!!!
  *   DDC1 & DDC2
- *   shadowfb
+ *   shadowfb (Note: dri uses shadow for another purpose in radeon_dri.c)
  *   overlay planes
  *
  * Modified by Marc Aurele La France (tsi@xfree86.org) for ATI driver merge.
@@ -130,6 +130,7 @@ typedef enum {
     OPTION_RING_SIZE,
     OPTION_BUFFER_SIZE,
     OPTION_DEPTH_MOVE,
+    OPTION_PAGE_FLIP,
 #endif
     OPTION_CRT_SCREEN,
     OPTION_PANEL_SIZE,
@@ -150,6 +151,7 @@ const OptionInfoRec RADEONOptions[] = {
     { OPTION_RING_SIZE,    "RingSize",         OPTV_INTEGER, {0}, FALSE },
     { OPTION_BUFFER_SIZE,  "BufferSize",       OPTV_INTEGER, {0}, FALSE },
     { OPTION_DEPTH_MOVE,   "EnableDepthMoves", OPTV_BOOLEAN, {0}, FALSE },
+    { OPTION_PAGE_FLIP, "EnablePageFlip",  OPTV_BOOLEAN, {0}, FALSE },
 #endif
     { OPTION_CRT_SCREEN,   "CrtScreen",        OPTV_BOOLEAN, {0}, FALSE},
     { OPTION_PANEL_SIZE,   "PanelSize",        OPTV_ANYSTR,  {0}, FALSE },
@@ -290,6 +292,11 @@ static const char *driSymbols[] = {
     "GlxSetVisualConfigs",
     NULL
 };
+
+static const char *driShadowSymbols[] = {
+    "shadowInit",
+    NULL
+};
 #endif
 
 static const char *vbeSymbols[] = {
@@ -1977,6 +1984,24 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn)
 		   "Depth moves disabled by default\n");
     }
 
+
+    if (!xf86LoadSubModule(pScrn, "shadow")) {
+       info->allowPageFlip = 0;
+       xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Disabling page flipping\n");
+    }
+    else {
+       xf86LoaderReqSymLists(driShadowSymbols, NULL);
+
+       if ((info->allowPageFlip = xf86ReturnOptValBool(info->Options,
+						       OPTION_PAGE_FLIP, 
+						       FALSE))) {
+	  xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Page flipping enabled\n");
+       } else {
+	  xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, "Disabling page flipping\n");
+       }
+    }
+
+
     return TRUE;
 }
 #endif
@@ -2634,7 +2659,7 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
 	 * pixmap cache.  Should be enough for a fullscreen background
 	 * image plus some leftovers.
 	 */
-	info->textureSize = info->FbMapSize - 6 * bufferSize;
+	info->textureSize = info->FbMapSize - 4 * bufferSize;
 
 	/* If that gives us less than half the available memory, let's
 	 * be greedy and grab some more.  Sorry, I care more about 3D
@@ -2698,6 +2723,9 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
 	scanlines = info->backOffset / width_bytes - 1;
 	if (scanlines > 8191) scanlines = 8191;
 
+	info->backY = scanlines;
+	info->backX = (info->backOffset - (scanlines * width_bytes - 1)) / cpp;
+
 	MemBox.x1 = 0;
 	MemBox.y1 = 0;
 	MemBox.x2 = pScrn->displayWidth;
diff --git a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_reg.h b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_reg.h
index c447364e4..1d56204e9 100644
--- a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_reg.h
+++ b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_reg.h
@@ -327,7 +327,6 @@
 #define RADEON_CRC_CMDFIFO_ADDR             0x0740
 #define RADEON_CRC_CMDFIFO_DOUT             0x0744
 #define RADEON_CRTC_CRNT_FRAME              0x0214
-#define RADEON_CRTC_DEBUG                   0x021c
 #define RADEON_CRTC_EXT_CNTL                0x0054
 #       define RADEON_CRTC_VGA_XOVERSCAN    (1 <<  0)
 #       define RADEON_VGA_ATI_LINEAR        (1 <<  3)
@@ -426,7 +425,6 @@
 #define RADEON_CRTC_VLINE_CRNT_VLINE        0x0210
 #       define RADEON_CRTC_CRNT_VLINE_MASK  (0x7ff << 16)
 #define RADEON_CRTC2_CRNT_FRAME             0x0314
-#define RADEON_CRTC2_DEBUG                  0x031c
 #define RADEON_CRTC2_GUI_TRIG_VLINE         0x0318
 #define RADEON_CRTC2_STATUS                 0x03fc
 #define RADEON_CRTC2_VLINE_CRNT_VLINE       0x0310
@@ -739,13 +737,6 @@
 
 #define RADEON_GRPH8_DATA                   0x03cf /* VGA */
 #define RADEON_GRPH8_IDX                    0x03ce /* VGA */
-#define RADEON_GUI_DEBUG0                   0x16a0
-#define RADEON_GUI_DEBUG1                   0x16a4
-#define RADEON_GUI_DEBUG2                   0x16a8
-#define RADEON_GUI_DEBUG3                   0x16ac
-#define RADEON_GUI_DEBUG4                   0x16b0
-#define RADEON_GUI_DEBUG5                   0x16b4
-#define RADEON_GUI_DEBUG6                   0x16b8
 #define RADEON_GUI_SCRATCH_REG0             0x15e0
 #define RADEON_GUI_SCRATCH_REG1             0x15e4
 #define RADEON_GUI_SCRATCH_REG2             0x15e8
@@ -766,8 +757,6 @@
 #       define RADEON_HDP_SOFT_RESET        (1 << 26)
 #define RADEON_HTOTAL_CNTL                  0x0009 /* PLL */
 #define RADEON_HTOTAL2_CNTL                 0x002e /* PLL */
-#define RADEON_HW_DEBUG                     0x0128
-#define RADEON_HW_DEBUG2                    0x011c
 
 #define RADEON_I2C_CNTL_1                   0x0094 /* ? */
 #define RADEON_DVI_I2C_CNTL_1               0x02e4 /* ? */
@@ -1065,9 +1054,6 @@
 #define RADEON_SURFACE7_UPPER_BOUND         0x0b78
 #define RADEON_SW_SEMAPHORE                 0x013c
 
-#define RADEON_TEST_DEBUG_CNTL              0x0120
-#define RADEON_TEST_DEBUG_MUX               0x0124
-#define RADEON_TEST_DEBUG_OUT               0x012c
 #define RADEON_TMDS_CRC                     0x02a0
 #define RADEON_TRAIL_BRES_DEC               0x1614
 #define RADEON_TRAIL_BRES_ERR               0x160c
@@ -1191,7 +1177,7 @@
 #       define RADEON_MAX_ANISO_8_TO_1                     (3  <<  5)
 #       define RADEON_MAX_ANISO_16_TO_1                    (4  <<  5)
 #       define RADEON_MAX_ANISO_MASK                       (7  <<  5)
-#       define RADEON_LOD_BIAS_MASK                        (0xffff <<  8)
+#       define RADEON_LOD_BIAS_MASK                        (0xff <<  8)
 #       define RADEON_LOD_BIAS_SHIFT                       8
 #       define RADEON_MAX_MIP_LEVEL_MASK                   (0x0f << 16)
 #       define RADEON_MAX_MIP_LEVEL_SHIFT                  16
@@ -1438,8 +1424,6 @@
 #       define RADEON_COLOR_FORMAT_aYUV444     (14 << 10)
 #       define RADEON_COLOR_FORMAT_ARGB4444    (15 << 10)
 #       define RADEON_CLRCMP_FLIP_ENABLE       (1  << 14)
-#       define RADEON_ZBLOCK8                  (0  << 15)
-#       define RADEON_ZBLOCK16                 (1  << 15)
 #define RADEON_RB3D_COLOROFFSET             0x1c40
 #       define RADEON_COLOROFFSET_MASK      0xfffffff0
 #define RADEON_RB3D_COLORPITCH              0x1c48
@@ -1500,7 +1484,6 @@
 #       define RADEON_Z_TEST_NEQUAL              (6  <<  4)
 #       define RADEON_Z_TEST_ALWAYS              (7  <<  4)
 #       define RADEON_Z_TEST_MASK                (7  <<  4)
-#       define RADEON_HIERARCHICAL_Z_ENABLE      (1  <<  8)
 #       define RADEON_STENCIL_TEST_NEVER         (0  << 12)
 #       define RADEON_STENCIL_TEST_LESS          (1  << 12)
 #       define RADEON_STENCIL_TEST_LEQUAL        (2  << 12)
@@ -1531,10 +1514,7 @@
 #       define RADEON_STENCIL_ZFAIL_DEC          (4  << 24)
 #       define RADEON_STENCIL_ZFAIL_INVERT       (5  << 24)
 #       define RADEON_STENCIL_ZFAIL_MASK         (0x7 << 24)
-#       define RADEON_Z_COMPRESSION_ENABLE       (1  << 28)
-#       define RADEON_FORCE_Z_DIRTY              (1  << 29)
 #       define RADEON_Z_WRITE_ENABLE             (1  << 30)
-#       define RADEON_Z_DECOMPRESSION_ENABLE     (1  << 31)
 #define RADEON_RE_LINE_PATTERN              0x1cd0
 #       define RADEON_LINE_PATTERN_MASK             0x0000ffff
 #       define RADEON_LINE_REPEAT_COUNT_SHIFT       16
@@ -1613,7 +1593,7 @@
 #       define RADEON_VC_32BIT_SWAP         (2 << 0)
 #       define RADEON_VC_HALF_DWORD_SWAP    (3 << 0)
 #       define RADEON_TCL_BYPASS            (1 << 8)
-#define RADEON_SE_COORD_FMT                 0x15c0
+#define RADEON_SE_COORD_FMT                 0x1c50
 #       define RADEON_VTX_XY_PRE_MULT_1_OVER_W0  (1 <<  0)
 #       define RADEON_VTX_Z_PRE_MULT_1_OVER_W0   (1 <<  1)
 #       define RADEON_VTX_ST0_NONPARAMETRIC      (1 <<  8)
@@ -1629,7 +1609,6 @@
 #       define RADEON_TEX1_W_ROUTING_USE_W0      (0 << 26)
 #       define RADEON_TEX1_W_ROUTING_USE_Q1      (1 << 26)
 #define RADEON_SE_LINE_WIDTH                0x1db8
-#define RADEON_SE_TCL_LIGHT_MODEL_CTL       0x226c
 #define RADEON_SE_TCL_MATERIAL_AMBIENT_RED     0x2220
 #define RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN   0x2224
 #define RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE    0x2228
@@ -1646,17 +1625,185 @@
 #define RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN  0x2244
 #define RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE   0x2248
 #define RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA  0x224c
-#define RADEON_SE_TCL_MATRIX_SELECT_0       0x225c
-#define RADEON_SE_TCL_MATRIX_SELECT_1       0x2260
+#define RADEON_SE_TCL_SHININESS             0x2250
+
 #define RADEON_SE_TCL_OUTPUT_VTX_FMT        0x2254
+#define     RADEON_TCL_VTX_W0                     (1<<0)
+#define     RADEON_TCL_VTX_FP_DIFFUSE             (1<<1)
+#define     RADEON_TCL_VTX_FP_ALPHA               (1<<2)
+#define     RADEON_TCL_VTX_PK_DIFFUSE             (1<<3)
+#define     RADEON_TCL_VTX_FP_SPEC                (1<<4)
+#define     RADEON_TCL_VTX_FP_FOG                 (1<<5)
+#define     RADEON_TCL_VTX_PK_SPEC                (1<<6)
+#define     RADEON_TCL_VTX_ST0                    (1<<7)
+#define     RADEON_TCL_VTX_ST1                    (1<<8)
+#define     RADEON_TCL_VTX_Q1                     (1<<9)
+#define     RADEON_TCL_VTX_ST2                    (1<<10)
+#define     RADEON_TCL_VTX_Q2                     (1<<11)
+#define     RADEON_TCL_VTX_ST3                    (1<<12)
+#define     RADEON_TCL_VTX_Q3                     (1<<13)
+#define     RADEON_TCL_VTX_Q0                     (1<<14)
+#define     RADEON_TCL_VTX_WEIGHT_COUNT_SHIFT     (15)
+#define     RADEON_TCL_VTX_NORM0                  (1<<18)
+#define     RADEON_TCL_VTX_XY1                    (1<<27)
+#define     RADEON_TCL_VTX_Z1                     (1<<28)
+#define     RADEON_TCL_VTX_W1                     (1<<29)
+#define     RADEON_TCL_VTX_NORM1                  (1<<30)
+#define     RADEON_TCL_VTX_Z0                     (1<<31)
+
+
 #define RADEON_SE_TCL_OUTPUT_VTX_SEL        0x2258
+#define     RADEON_TCL_COMPUTE_XYZW              (1<<0)
+#define     RADEON_TCL_COMPUTE_DIFFUSE           (1<<1)
+#define     RADEON_TCL_COMPUTE_SPECULAR          (1<<2)
+#define     RADEON_TCL_FORCE_NAN_IF_COLOR_NAN    (1<<3)
+#define     RADEON_TCL_FORCE_INORDER_PROC        (1<<4)
+#define     RADEON_TCL_TEX_INPUT_TEX_0           (0)
+#define     RADEON_TCL_TEX_INPUT_TEX_1           (1)
+#define     RADEON_TCL_TEX_INPUT_TEX_2           (2)
+#define     RADEON_TCL_TEX_INPUT_TEX_3           (3)
+#define     RADEON_TCL_TEX_COMPUTED_TEX_0        (8)
+#define     RADEON_TCL_TEX_COMPUTED_TEX_1        (9)
+#define     RADEON_TCL_TEX_COMPUTED_TEX_2        (10)
+#define     RADEON_TCL_TEX_COMPUTED_TEX_3        (11)
+#define     RADEON_TCL_TEX_0_OUTPUT_SHIFT        (16)
+#define     RADEON_TCL_TEX_1_OUTPUT_SHIFT        (20)
+#define     RADEON_TCL_TEX_2_OUTPUT_SHIFT        (24)
+#define     RADEON_TCL_TEX_3_OUTPUT_SHIFT        (28)
+
+#define RADEON_SE_TCL_MATRIX_SELECT_0       0x225c
+#define       RADEON_MODELVIEW_0_SHIFT           (0)
+#define       RADEON_MODELVIEW_1_SHIFT           (4)
+#define       RADEON_MODELVIEW_2_SHIFT           (8)
+#define       RADEON_MODELVIEW_3_SHIFT           (12)
+#define       RADEON_IT_MODELVIEW_0_SHIFT        (16)
+#define       RADEON_IT_MODELVIEW_1_SHIFT        (20)
+#define       RADEON_IT_MODELVIEW_2_SHIFT        (24)
+#define       RADEON_IT_MODELVIEW_3_SHIFT        (28)
+
+#define RADEON_SE_TCL_MATRIX_SELECT_1       0x2260
+#define       RADEON_MODELPROJECT_0_SHIFT           (0)
+#define       RADEON_MODELPROJECT_1_SHIFT           (4)
+#define       RADEON_MODELPROJECT_2_SHIFT           (8)
+#define       RADEON_MODELPROJECT_3_SHIFT           (12)
+#define       RADEON_TEXMAT_0_SHIFT                 (16)
+#define       RADEON_TEXMAT_1_SHIFT                 (20)
+#define       RADEON_TEXMAT_2_SHIFT                 (24)
+#define       RADEON_TEXMAT_3_SHIFT                 (28)
+
+#define RADEON_SE_TCL_UCP_VERT_BLEND_CTL    0x2264
+#define       RADEON_UCP_IN_CLIP_SPACE              (1<<0)
+#define       RADEON_UCP_IN_MODEL_SPACE             (1<<1)
+#define       RADEON_UCP_ENABLE_0                   (1<<2)
+#define       RADEON_UCP_ENABLE_1                   (1<<3)
+#define       RADEON_UCP_ENABLE_2                   (1<<4)
+#define       RADEON_UCP_ENABLE_3                   (1<<5)
+#define       RADEON_UCP_ENABLE_4                   (1<<6)
+#define       RADEON_UCP_ENABLE_5                   (1<<7)
+#define       RADEON_TCL_FOG_MASK                   (3<<8)
+#define       RADEON_TCL_FOG_DISABLE                (0<<8)
+#define       RADEON_TCL_FOG_EXP                    (1<<8)
+#define       RADEON_TCL_FOG_EXP2                   (2<<8)
+#define       RADEON_TCL_FOG_LINEAR                 (3<<8)
+#define       RADEON_RNG_BASED_FOG                  (1<<10)
+#define       RADEON_LIGHT_TWOSIDE                  (1<<11)
+#define       RADEON_BLEND_OP_COUNT_MASK            (7<<12)
+#define       RADEON_BLEND_OP_COUNT_SHIFT           (12)
+#define       RADEON_POSITION_BLEND_OP_ENABLE       (1<<16)
+#define       RADEON_NORMAL_BLEND_OP_ENABLE         (1<<17)
+#define       RADEON_VERTEX_BLEND_SRC_0_PRIMARY     (1<<18)
+#define       RADEON_VERTEX_BLEND_SRC_0_SECONDARY   (1<<18)
+#define       RADEON_VERTEX_BLEND_SRC_1_PRIMARY     (1<<19)
+#define       RADEON_VERTEX_BLEND_SRC_1_SECONDARY   (1<<19)
+#define       RADEON_VERTEX_BLEND_SRC_2_PRIMARY     (1<<20)
+#define       RADEON_VERTEX_BLEND_SRC_2_SECONDARY   (1<<20)
+#define       RADEON_VERTEX_BLEND_SRC_3_PRIMARY     (1<<21)
+#define       RADEON_VERTEX_BLEND_SRC_3_SECONDARY   (1<<21)
+#define       RADEON_VERTEX_BLEND_WGT_MINUS_ONE     (1<<22)
+#define       RADEON_CULL_FRONT_IS_CW               (0<<28)
+#define       RADEON_CULL_FRONT_IS_CCW              (1<<28)
+#define       RADEON_CULL_FRONT                     (1<<29)
+#define       RADEON_CULL_BACK                      (1<<30)
+#define       RADEON_FORCE_W_TO_ONE                 (1<<31)
+
+#define RADEON_SE_TCL_TEXTURE_PROC_CTL      0x2268
+#define       RADEON_TEXGEN_TEXMAT_0_ENABLE         (1<<0)
+#define       RADEON_TEXGEN_TEXMAT_1_ENABLE         (1<<1)
+#define       RADEON_TEXGEN_TEXMAT_2_ENABLE         (1<<2)
+#define       RADEON_TEXGEN_TEXMAT_3_ENABLE         (1<<3)
+#define       RADEON_TEXMAT_0_ENABLE                (1<<4)
+#define       RADEON_TEXMAT_1_ENABLE                (1<<5)
+#define       RADEON_TEXMAT_2_ENABLE                (1<<6)
+#define       RADEON_TEXMAT_3_ENABLE                (1<<7)
+#define       RADEON_TEXGEN_INPUT_MASK           (0xf)
+#define       RADEON_TEXGEN_INPUT_TEXCOORD_0     (0)
+#define       RADEON_TEXGEN_INPUT_TEXCOORD_1     (1)
+#define       RADEON_TEXGEN_INPUT_TEXCOORD_2     (2)
+#define       RADEON_TEXGEN_INPUT_TEXCOORD_3     (3)
+#define       RADEON_TEXGEN_INPUT_OBJ            (4)
+#define       RADEON_TEXGEN_INPUT_EYE            (5)
+#define       RADEON_TEXGEN_INPUT_EYE_NORMAL     (6)
+#define       RADEON_TEXGEN_INPUT_EYE_REFLECT    (7)
+#define       RADEON_TEXGEN_INPUT_EYE_NORMALIZED (8)
+#define       RADEON_TEXGEN_0_INPUT_SHIFT        (16)
+#define       RADEON_TEXGEN_1_INPUT_SHIFT        (20)
+#define       RADEON_TEXGEN_2_INPUT_SHIFT        (24)
+#define       RADEON_TEXGEN_3_INPUT_SHIFT        (28)
+
+
+#define RADEON_SE_TCL_LIGHT_MODEL_CTL       0x226c
+#define       RADEON_LIGHTING_ENABLE                (1<<0)
+#define       RADEON_LIGHT_IN_MODELSPACE            (1<<1)
+#define       RADEON_LOCAL_VIEWER                   (1<<2)
+#define       RADEON_NORMALIZE_NORMALS              (1<<3)
+#define       RADEON_RESCALE_NORMALS                (1<<4)
+#define       RADEON_SPECULAR_LIGHTS                (1<<5)
+#define       RADEON_DIFFUSE_SPECULAR_COMBINE       (1<<6)
+#define       RADEON_LIGHT_ALPHA                    (1<<7)
+#define       RADEON_LOCAL_LIGHT_VEC_GL             (1<<8)
+#define       RADEON_LIGHT_NO_NORMAL_AMBIENT_ONLY   (1<<9)
+#define       RADEON_LM_SOURCE_STATE_PREMULT        (0)
+#define       RADEON_LM_SOURCE_STATE_MULT           (1)
+#define       RADEON_LM_SOURCE_VERTEX_DIFFUSE       (2)
+#define       RADEON_LM_SOURCE_VERTEX_SPECULAR      (3)
+#define       RADEON_EMISSIVE_SOURCE_SHIFT         (16)
+#define       RADEON_AMBIENT_SOURCE_SHIFT          (18)
+#define       RADEON_DIFFUSE_SOURCE_SHIFT          (20)
+#define       RADEON_SPECULAR_SOURCE_SHIFT         (22)
+
+
 #define RADEON_SE_TCL_PER_LIGHT_CTL_0       0x2270
+#define       RADEON_LIGHT_0_SHIFT                   (0)
+#define       RADEON_LIGHT_1_SHIFT                   (16)
 #define RADEON_SE_TCL_PER_LIGHT_CTL_1       0x2274
+#define       RADEON_LIGHT_2_SHIFT                   (0)
+#define       RADEON_LIGHT_3_SHIFT                   (16)
 #define RADEON_SE_TCL_PER_LIGHT_CTL_2       0x2278
+#define       RADEON_LIGHT_4_SHIFT                   (0)
+#define       RADEON_LIGHT_5_SHIFT                   (16)
 #define RADEON_SE_TCL_PER_LIGHT_CTL_3       0x227c
-#define RADEON_SE_TCL_SHININESS             0x2250
-#define RADEON_SE_TCL_TEXTURE_PROC_CTL      0x2268
-#define RADEON_SE_TCL_UCP_VERT_BLEND_CTL    0x2264
+#define       RADEON_LIGHT_6_SHIFT                   (0)
+#define       RADEON_LIGHT_7_SHIFT                   (16)
+
+#define       RADEON_LIGHT_0_ENABLE                    (1<<0)
+#define       RADEON_LIGHT_0_ENABLE_AMBIENT            (1<<1)
+#define       RADEON_LIGHT_0_ENABLE_SPECULAR           (1<<2)
+#define       RADEON_LIGHT_0_IS_LOCAL                  (1<<3)
+#define       RADEON_LIGHT_0_IS_SPOT                   (1<<4)
+#define       RADEON_LIGHT_0_DUAL_CONE                 (1<<5)
+#define       RADEON_LIGHT_0_ENABLE_RANGE_ATTEN        (1<<6)
+#define       RADEON_LIGHT_0_CONSTANT_RANGE_ATTEN      (1<<7)
+
+#define       RADEON_LIGHT_1_ENABLE                    (1<<16)
+#define       RADEON_LIGHT_1_ENABLE_AMBIENT            (1<<17)
+#define       RADEON_LIGHT_1_ENABLE_SPECULAR           (1<<18)
+#define       RADEON_LIGHT_1_IS_LOCAL                  (1<<19)
+#define       RADEON_LIGHT_1_IS_SPOT                   (1<<20)
+#define       RADEON_LIGHT_1_DUAL_CONE                 (1<<21)
+#define       RADEON_LIGHT_1_ENABLE_RANGE_ATTEN        (1<<22)
+#define       RADEON_LIGHT_1_CONSTANT_RANGE_ATTEN      (1<<23)
+
+
 #define RADEON_SE_VPORT_XSCALE              0x1d98
 #define RADEON_SE_VPORT_XOFFSET             0x1d9c
 #define RADEON_SE_VPORT_YSCALE              0x1da0
@@ -1732,14 +1879,12 @@
 #define RADEON_CP_PACKET3_SET_SCISSORS              0xC0001E00
 #define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM     0xC0002300
 #define RADEON_CP_PACKET3_LOAD_MICROCODE            0xC0002400
-#define RADEON_CP_PACKET3_3D_RNDR_GEN_PRIM          0xC0002500
 #define RADEON_CP_PACKET3_WAIT_FOR_IDLE             0xC0002600
 #define RADEON_CP_PACKET3_3D_DRAW_VBUF              0xC0002800
 #define RADEON_CP_PACKET3_3D_DRAW_IMMD              0xC0002900
 #define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
 #define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
 #define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
-#define RADEON_CP_PACKET3_3D_CLEAR_ZMASK            0xC0003200
 #define RADEON_CP_PACKET3_CNTL_PAINT                0xC0009100
 #define RADEON_CP_PACKET3_CNTL_BITBLT               0xC0009200
 #define RADEON_CP_PACKET3_CNTL_SMALLTEXT            0xC0009300
@@ -1794,7 +1939,48 @@
 #define RADEON_CP_VC_CNTL_MAOS_ENABLE               0x00000080
 #define RADEON_CP_VC_CNTL_VTX_FMT_NON_RADEON_MODE   0x00000000
 #define RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE       0x00000100
+#define RADEON_CP_VC_CNTL_TCL_DISABLE               0x00000000
+#define RADEON_CP_VC_CNTL_TCL_ENABLE                0x00000200
 #define RADEON_CP_VC_CNTL_NUM_SHIFT                 16
 
+#define RADEON_VS_MATRIX_0_ADDR        0
+#define RADEON_VS_MATRIX_1_ADDR        4
+#define RADEON_VS_MATRIX_2_ADDR        8
+#define RADEON_VS_MATRIX_3_ADDR        12
+#define RADEON_VS_MATRIX_4_ADDR        16
+#define RADEON_VS_MATRIX_5_ADDR        20
+#define RADEON_VS_MATRIX_6_ADDR        24
+#define RADEON_VS_MATRIX_7_ADDR        28
+#define RADEON_VS_MATRIX_8_ADDR        32
+#define RADEON_VS_MATRIX_9_ADDR        36
+#define RADEON_VS_MATRIX_10_ADDR       40
+#define RADEON_VS_MATRIX_11_ADDR       44
+#define RADEON_VS_MATRIX_12_ADDR       48
+#define RADEON_VS_MATRIX_13_ADDR       52
+#define RADEON_VS_MATRIX_14_ADDR       56
+#define RADEON_VS_MATRIX_15_ADDR       60
+#define RADEON_VS_LIGHT_AMBIENT_ADDR             64
+#define RADEON_VS_LIGHT_DIFFUSE_ADDR             72
+#define RADEON_VS_LIGHT_SPECULAR_ADDR            80
+#define RADEON_VS_LIGHT_DIRPOS_ADDR              88
+#define RADEON_VS_LIGHT_HWVSPOT_ADDR             96
+#define RADEON_VS_LIGHT_ATTENUATION_ADDR        104
+#define RADEON_VS_MATRIX_EYE2CLIP_ADDR          112
+#define RADEON_VS_UCP_ADDR                      116
+#define RADEON_VS_GLOBAL_AMBIENT_ADDR           122
+#define RADEON_VS_FOG_PARAM_ADDR                123
+#define RADEON_VS_EYE_VECTOR_ADDR               124
+
+#define RADEON_SS_LIGHT_DCD_ADDR                  0
+#define RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR        8
+#define RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR         16
+#define RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR     24
+#define RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR        32
+#define RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR       48
+#define RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR    49
+#define RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR       50
+#define RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR    51
+#define RADEON_SS_SHININESS                      60
+
 #endif
 
diff --git a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_sarea.h b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_sarea.h
index cdcdd5fa9..2f93e6be9 100644
--- a/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_sarea.h
+++ b/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_sarea.h
@@ -173,23 +173,6 @@ typedef struct {
     /* Setup state */
     unsigned int se_cntl_status;
 
-#ifdef TCL_ENABLE
-    /* TCL state */
-    radeon_color_regs_t se_tcl_material_emmissive;
-    radeon_color_regs_t se_tcl_material_ambient;
-    radeon_color_regs_t se_tcl_material_diffuse;
-    radeon_color_regs_t se_tcl_material_specular;
-    unsigned int se_tcl_shininess;
-    unsigned int se_tcl_output_vtx_fmt;
-    unsigned int se_tcl_output_vtx_sel;
-    unsigned int se_tcl_matrix_select_0;
-    unsigned int se_tcl_matrix_select_1;
-    unsigned int se_tcl_ucp_vert_blend_ctl;
-    unsigned int se_tcl_texture_proc_ctl;
-    unsigned int se_tcl_light_model_ctl;
-    unsigned int se_tcl_per_light_ctl[4];
-#endif
-
     /* Misc state */
     unsigned int re_top_left;
     unsigned int re_misc;
@@ -203,13 +186,7 @@ typedef struct {
     unsigned int pp_txcblend;
     unsigned int pp_txablend;
     unsigned int pp_tfactor;
-
     unsigned int pp_border_color;
-
-#ifdef CUBIC_ENABLE
-    unsigned int pp_cubic_faces;
-    unsigned int pp_cubic_offset[5];
-#endif
 } radeon_texture_regs_t;
 
 typedef struct {
@@ -259,6 +236,8 @@ typedef struct {
     int texAge[RADEON_NR_TEX_HEAPS];
 
     int ctxOwner;		/* last context to upload state */
+    int pfAllowPageFlip;        /* set by the 2d driver, read by the client */
+    int pfCurrentPage;		/* set by kernel, read by others */
 } RADEONSAREAPriv, *RADEONSAREAPrivPtr;
 
 #endif
diff --git a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/Imakefile b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/Imakefile
index cf6ba5724..cacdc7a11 100644
--- a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/Imakefile
+++ b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/Imakefile
@@ -17,3 +17,4 @@ all::
 
 clean::
 	$(MAKE) -f Makefile.linux clean
+
diff --git a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/drm.h b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/drm.h
index d116f3752..6ab295c48 100644
--- a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/drm.h
+++ b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/drm.h
@@ -84,6 +84,10 @@ typedef unsigned int  drm_magic_t;
 /* Warning: If you change this structure, make sure you change
  * XF86DRIClipRectRec in the server as well */
 
+/* KW: Actually it's illegal to change either for
+ * backwards-compatibility reasons.
+ */
+
 typedef struct drm_clip_rect {
 	unsigned short	x1;
 	unsigned short	y1;
diff --git a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_cp.c b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_cp.c
index 14901f59c..5486f1c13 100644
--- a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_cp.c
+++ b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_cp.c
@@ -461,6 +461,7 @@ int radeon_do_cp_idle( drm_radeon_private_t *dev_priv )
 	RADEON_WAIT_UNTIL_IDLE();
 
 	ADVANCE_RING();
+	COMMIT_RING();
 
 	return radeon_do_wait_for_idle( dev_priv );
 }
@@ -485,6 +486,7 @@ static void radeon_do_cp_start( drm_radeon_private_t *dev_priv )
 	RADEON_WAIT_UNTIL_IDLE();
 
 	ADVANCE_RING();
+	COMMIT_RING();
 }
 
 /* Reset the Command Processor.  This will not flush any pending
@@ -751,7 +753,7 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init )
 	 */
 	dev_priv->depth_clear.rb3d_cntl = (RADEON_PLANE_MASK_ENABLE |
 					   (dev_priv->color_fmt << 10) |
-					   RADEON_ZBLOCK16);
+					   (1<<15));
 
 	dev_priv->depth_clear.rb3d_zstencilcntl = 
 		(dev_priv->depth_fmt |
@@ -970,9 +972,7 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init )
 	radeon_cp_load_microcode( dev_priv );
 	radeon_cp_init_ring_buffer( dev, dev_priv );
 
-#if ROTATE_BUFS
 	dev_priv->last_buf = 0;
-#endif
 
 	dev->dev_private = (void *)dev_priv;
 
@@ -1152,116 +1152,27 @@ int radeon_engine_reset( struct inode *inode, struct file *filp,
  * Fullscreen mode
  */
 
-static int radeon_do_init_pageflip( drm_device_t *dev )
-{
-	drm_radeon_private_t *dev_priv = dev->dev_private;
-	DRM_DEBUG( "%s\n", __FUNCTION__ );
-
-	dev_priv->crtc_offset =      RADEON_READ( RADEON_CRTC_OFFSET );
-	dev_priv->crtc_offset_cntl = RADEON_READ( RADEON_CRTC_OFFSET_CNTL );
-
-	RADEON_WRITE( RADEON_CRTC_OFFSET, dev_priv->front_offset );
-	RADEON_WRITE( RADEON_CRTC_OFFSET_CNTL,
-		      dev_priv->crtc_offset_cntl |
-		      RADEON_CRTC_OFFSET_FLIP_CNTL );
-
-	dev_priv->page_flipping = 1;
-	dev_priv->current_page = 0;
-
-	return 0;
-}
-
-int radeon_do_cleanup_pageflip( drm_device_t *dev )
-{
-	drm_radeon_private_t *dev_priv = dev->dev_private;
-	DRM_DEBUG( "%s\n", __FUNCTION__ );
-
-	RADEON_WRITE( RADEON_CRTC_OFFSET,      dev_priv->crtc_offset );
-	RADEON_WRITE( RADEON_CRTC_OFFSET_CNTL, dev_priv->crtc_offset_cntl );
-
-	dev_priv->page_flipping = 0;
-	dev_priv->current_page = 0;
-
-	return 0;
-}
-
+/* KW: Deprecated to say the least:
+ */
 int radeon_fullscreen( struct inode *inode, struct file *filp,
 		       unsigned int cmd, unsigned long arg )
 {
-        drm_file_t *priv = filp->private_data;
-        drm_device_t *dev = priv->dev;
-	drm_radeon_fullscreen_t fs;
-
-	LOCK_TEST_WITH_RETURN( dev );
-
-	if ( copy_from_user( &fs, (drm_radeon_fullscreen_t *)arg,
-			     sizeof(fs) ) )
-		return -EFAULT;
-
-	switch ( fs.func ) {
-	case RADEON_INIT_FULLSCREEN:
-		return radeon_do_init_pageflip( dev );
-	case RADEON_CLEANUP_FULLSCREEN:
-		return radeon_do_cleanup_pageflip( dev );
-	}
-
-	return -EINVAL;
+	return 0;
 }
 
 
 /* ================================================================
  * Freelist management
  */
-#define RADEON_BUFFER_USED	0xffffffff
-#define RADEON_BUFFER_FREE	0
-
-#if 0
-static int radeon_freelist_init( drm_device_t *dev )
-{
-	drm_device_dma_t *dma = dev->dma;
-	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_buf_t *buf;
-	drm_radeon_buf_priv_t *buf_priv;
-	drm_radeon_freelist_t *entry;
-	int i;
-
-	dev_priv->head = DRM(alloc)( sizeof(drm_radeon_freelist_t),
-				     DRM_MEM_DRIVER );
-	if ( dev_priv->head == NULL )
-		return -ENOMEM;
-
-	memset( dev_priv->head, 0, sizeof(drm_radeon_freelist_t) );
-	dev_priv->head->age = RADEON_BUFFER_USED;
-
-	for ( i = 0 ; i < dma->buf_count ; i++ ) {
-		buf = dma->buflist[i];
-		buf_priv = buf->dev_private;
-
-		entry = DRM(alloc)( sizeof(drm_radeon_freelist_t),
-				    DRM_MEM_DRIVER );
-		if ( !entry ) return -ENOMEM;
-
-		entry->age = RADEON_BUFFER_FREE;
-		entry->buf = buf;
-		entry->prev = dev_priv->head;
-		entry->next = dev_priv->head->next;
-		if ( !entry->next )
-			dev_priv->tail = entry;
 
-		buf_priv->discard = 0;
-		buf_priv->dispatched = 0;
-		buf_priv->list_entry = entry;
-
-		dev_priv->head->next = entry;
-
-		if ( dev_priv->head->next )
-			dev_priv->head->next->prev = entry;
-	}
-
-	return 0;
-
-}
-#endif
+/* Original comment: FIXME: ROTATE_BUFS is a hack to cycle through
+ *   bufs until freelist code is used.  Note this hides a problem with
+ *   the scratch register * (used to keep track of last buffer
+ *   completed) being written to before * the last buffer has actually
+ *   completed rendering.  
+ *
+ * KW:  It's also a good way to find free buffers quickly.
+ */
 
 drm_buf_t *radeon_freelist_get( drm_device_t *dev )
 {
@@ -1270,57 +1181,24 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev )
 	drm_radeon_buf_priv_t *buf_priv;
 	drm_buf_t *buf;
 	int i, t;
-#if ROTATE_BUFS
 	int start;
-#endif
-
-	/* FIXME: Optimize -- use freelist code */
 
-	for ( i = 0 ; i < dma->buf_count ; i++ ) {
-		buf = dma->buflist[i];
-		buf_priv = buf->dev_private;
-		if ( buf->pid == 0 ) {
-			DRM_DEBUG( "  ret buf=%d last=%d pid=0\n",
-				   buf->idx, dev_priv->last_buf );
-			return buf;
-		}
-		DRM_DEBUG( "    skipping buf=%d pid=%d\n",
-			   buf->idx, buf->pid );
-	}
-
-#if ROTATE_BUFS
 	if ( ++dev_priv->last_buf >= dma->buf_count )
 		dev_priv->last_buf = 0;
+
 	start = dev_priv->last_buf;
-#endif
+
 	for ( t = 0 ; t < dev_priv->usec_timeout ; t++ ) {
-#if 0
-		/* FIXME: Disable this for now */
-		u32 done_age = dev_priv->scratch[RADEON_LAST_DISPATCH];
-#else
 		u32 done_age = RADEON_READ( RADEON_LAST_DISPATCH_REG );
-#endif
-#if ROTATE_BUFS
 		for ( i = start ; i < dma->buf_count ; i++ ) {
-#else
-		for ( i = 0 ; i < dma->buf_count ; i++ ) {
-#endif
 			buf = dma->buflist[i];
 			buf_priv = buf->dev_private;
-			if ( buf->pending && buf_priv->age <= done_age ) {
-				/* The buffer has been processed, so it
-				 * can now be used.
-				 */
+			if ( buf->pid == 0 || (buf->pending && 
+					       buf_priv->age <= done_age) ) {
 				buf->pending = 0;
-				DRM_DEBUG( "  ret buf=%d last=%d age=%d done=%d\n", buf->idx, dev_priv->last_buf, buf_priv->age, done_age );
 				return buf;
 			}
-			DRM_DEBUG( "    skipping buf=%d age=%d done=%d\n",
-				   buf->idx, buf_priv->age,
-				   done_age );
-#if ROTATE_BUFS
 			start = 0;
-#endif
 		}
 		udelay( 1 );
 	}
@@ -1332,14 +1210,10 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev )
 void radeon_freelist_reset( drm_device_t *dev )
 {
 	drm_device_dma_t *dma = dev->dma;
-#if ROTATE_BUFS
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-#endif
 	int i;
 
-#if ROTATE_BUFS
 	dev_priv->last_buf = 0;
-#endif
 	for ( i = 0 ; i < dma->buf_count ; i++ ) {
 		drm_buf_t *buf = dma->buflist[i];
 		drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
diff --git a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drm.h b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drm.h
index 6774b2bc0..dd24d4299 100644
--- a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drm.h
+++ b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drm.h
@@ -2,6 +2,7 @@
  *
  * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas.
  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
+ * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -38,7 +39,8 @@
 #ifndef __RADEON_SAREA_DEFINES__
 #define __RADEON_SAREA_DEFINES__
 
-/* What needs to be changed for the current vertex buffer?
+/* Old style state flags, required for sarea interface (1.1 and 1.2
+ * clears) and 1.2 drm_vertex2 ioctl.
  */
 #define RADEON_UPLOAD_CONTEXT		0x00000001
 #define RADEON_UPLOAD_VERTFMT		0x00000002
@@ -58,8 +60,68 @@
 #define RADEON_UPLOAD_CLIPRECTS		0x00008000 /* handled client-side */
 #define RADEON_REQUIRE_QUIESCENCE	0x00010000
 #define RADEON_UPLOAD_ZBIAS		0x00020000 /* version 1.2 and newer */
-#define RADEON_UPLOAD_ALL		0x0002ffff
-#define RADEON_UPLOAD_CONTEXT_ALL       0x000201ff
+#define RADEON_UPLOAD_ALL		0x003effff
+#define RADEON_UPLOAD_CONTEXT_ALL       0x003e01ff
+
+
+/* New style per-packet identifiers for use in cmd_buffer ioctl with
+ * the RADEON_EMIT_PACKET command.  Comments relate new packets to old
+ * state bits and the packet size:
+ */
+#define RADEON_EMIT_PP_MISC                         0 /* context/7 */
+#define RADEON_EMIT_PP_CNTL                         1 /* context/3 */
+#define RADEON_EMIT_RB3D_COLORPITCH                 2 /* context/1 */
+#define RADEON_EMIT_RE_LINE_PATTERN                 3 /* line/2 */
+#define RADEON_EMIT_SE_LINE_WIDTH                   4 /* line/1 */
+#define RADEON_EMIT_PP_LUM_MATRIX                   5 /* bumpmap/1 */
+#define RADEON_EMIT_PP_ROT_MATRIX_0                 6 /* bumpmap/2 */
+#define RADEON_EMIT_RB3D_STENCILREFMASK             7 /* masks/3 */
+#define RADEON_EMIT_SE_VPORT_XSCALE                 8 /* viewport/6 */
+#define RADEON_EMIT_SE_CNTL                         9 /* setup/2 */
+#define RADEON_EMIT_SE_CNTL_STATUS                  10 /* setup/1 */
+#define RADEON_EMIT_RE_MISC                         11 /* misc/1 */
+#define RADEON_EMIT_PP_TXFILTER_0                   12 /* tex0/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_0               13 /* tex0/1 */
+#define RADEON_EMIT_PP_TXFILTER_1                   14 /* tex1/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_1               15 /* tex1/1 */
+#define RADEON_EMIT_PP_TXFILTER_2                   16 /* tex2/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_2               17 /* tex2/1 */
+#define RADEON_EMIT_SE_ZBIAS_FACTOR                 18 /* zbias/2 */
+#define RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT           19 /* tcl/11 */
+#define RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED   20 /* material/17 */
+#define RADEON_MAX_STATE_PACKETS                    21
+
+
+/* Commands understood by cmd_buffer ioctl.  More can be added but
+ * obviously these can't be removed or changed:
+ */
+#define RADEON_CMD_PACKET      1 /* emit one of the register packets above */
+#define RADEON_CMD_SCALARS     2 /* emit scalar data */
+#define RADEON_CMD_VECTORS     3 /* emit vector data */
+#define RADEON_CMD_DMA_DISCARD 4 /* discard current dma buf */
+#define RADEON_CMD_PACKET3     5 /* emit hw packet */
+#define RADEON_CMD_PACKET3_CLIP 6 /* emit hw packet wrapped in cliprects */
+
+
+typedef union {
+	int i;
+	struct { 
+		char cmd_type, pad0, pad1, pad2;
+	} header;
+	struct { 
+		char cmd_type, packet_id, pad0, pad1;
+	} packet;
+	struct { 
+		char cmd_type, offset, stride, count; 
+	} scalars;
+	struct { 
+		char cmd_type, offset, stride, count; 
+	} vectors;
+	struct { 
+		char cmd_type, buf_idx, pad0, pad1; 
+	} dma;
+} drm_radeon_cmd_header_t;
+
 
 #define RADEON_FRONT			0x1
 #define RADEON_BACK			0x2
@@ -82,7 +144,6 @@
 /* Byte offsets for indirect buffer data
  */
 #define RADEON_INDEX_PRIM_OFFSET	20
-#define RADEON_HOSTDATA_BLIT_OFFSET	32
 
 #define RADEON_SCRATCH_REG_OFFSET	32
 
@@ -181,8 +242,6 @@ typedef struct {
 	unsigned int pp_border_color;
 } drm_radeon_texture_regs_t;
 
-/* Space is crucial; there is some redunancy here:
- */
 typedef struct {
 	unsigned int start;
 	unsigned int finish;
@@ -192,6 +251,7 @@ typedef struct {
         unsigned int vc_format;   /* vertex format */
 } drm_radeon_prim_t;
 
+
 typedef struct {
 	drm_radeon_context_regs_t context;
 	drm_radeon_texture_regs_t tex[RADEON_MAX_TEXTURE_UNITS];
@@ -231,6 +291,8 @@ typedef struct {
 	drm_radeon_tex_region_t tex_list[RADEON_NR_TEX_HEAPS][RADEON_NR_TEX_REGIONS+1];
 	int tex_age[RADEON_NR_TEX_HEAPS];
 	int ctx_owner;
+        int pfState;                /* number of 3d windows (0,1,2ormore) */
+        int pfCurrentPage;	    /* which buffer is being displayed? */
 } drm_radeon_sarea_t;
 
 
@@ -258,6 +320,9 @@ typedef struct {
 #define DRM_IOCTL_RADEON_INDIRECT   DRM_IOWR(0x4d, drm_radeon_indirect_t)
 #define DRM_IOCTL_RADEON_TEXTURE    DRM_IOWR(0x4e, drm_radeon_texture_t)
 #define DRM_IOCTL_RADEON_VERTEX2    DRM_IOW( 0x4f, drm_radeon_vertex_t)
+#define DRM_IOCTL_RADEON_CMDBUF     DRM_IOW( 0x50, drm_radeon_cmd_buffer_t)
+#define DRM_IOCTL_RADEON_GETPARAM   DRM_IOWR(0x51, drm_radeon_getparam_t)
+#define DRM_IOCTL_RADEON_FLIP	    DRM_IO(  0x52)
 
 typedef struct drm_radeon_init {
 	enum {
@@ -324,6 +389,18 @@ typedef struct drm_radeon_vertex {
 	int discard;			/* Client finished with buffer? */
 } drm_radeon_vertex_t;
 
+typedef struct drm_radeon_indices {
+	int prim;
+	int idx;
+	int start;
+	int end;
+	int discard;			/* Client finished with buffer? */
+} drm_radeon_indices_t;
+
+/* v1.2 - obsoletes drm_radeon_vertex and drm_radeon_indices
+ *      - allows multiple primitives and state changes in a single ioctl
+ *      - supports driver change to emit native primitives
+ */
 typedef struct drm_radeon_vertex2 {
 	int idx;			/* Index of vertex buffer */
 	int discard;			/* Client finished with buffer? */
@@ -333,13 +410,22 @@ typedef struct drm_radeon_vertex2 {
 	drm_radeon_prim_t *prim;
 } drm_radeon_vertex2_t;
 
-typedef struct drm_radeon_indices {
-	int prim;
-	int idx;
-	int start;
-	int end;
-	int discard;			/* Client finished with buffer? */
-} drm_radeon_indices_t;
+/* v1.3 - obsoletes drm_radeon_vertex2
+ *      - allows arbitarily large cliprect list 
+ *      - allows updating of tcl packet, vector and scalar state
+ *      - allows memory-efficient description of state updates
+ *      - allows state to be emitted without a primitive 
+ *           (for clears, ctx switches)
+ *      - allows more than one dma buffer to be referenced per ioctl
+ *      - supports tcl driver
+ *      - may be extended in future versions with new cmd types, packets
+ */
+typedef struct drm_radeon_cmd_buffer {
+	int bufsz;
+	char *buf;
+	int nbox;
+	drm_clip_rect_t *boxes;
+} drm_radeon_cmd_buffer_t;
 
 typedef struct drm_radeon_tex_image {
 	unsigned int x, y;		/* Blit coordinates */
@@ -367,4 +453,15 @@ typedef struct drm_radeon_indirect {
 	int discard;
 } drm_radeon_indirect_t;
 
+
+/* 1.3: An ioctl to get parameters that aren't available to the 3d
+ * client any other way.  
+ */
+#define RADEON_PARAM_AGP_BUFFER_OFFSET 0x1
+
+typedef struct drm_radeon_getparam {
+	int param;
+	int *value;
+} drm_radeon_getparam_t;
+
 #endif
diff --git a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drv.c b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drv.c
index 135dd184f..e4af560b8 100644
--- a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drv.c
+++ b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drv.c
@@ -39,10 +39,10 @@
 
 #define DRIVER_NAME		"radeon"
 #define DRIVER_DESC		"ATI Radeon"
-#define DRIVER_DATE		"20020602"
+#define DRIVER_DATE		"20020611"
 
 #define DRIVER_MAJOR		1
-#define DRIVER_MINOR		2
+#define DRIVER_MINOR		3
 #define DRIVER_PATCHLEVEL	1
 
 /* Interface history:
@@ -51,6 +51,10 @@
  * 1.2 - Add vertex2 ioctl (keith)
  *     - Add stencil capability to clear ioctl (gareth, keith)
  *     - Increase MAX_TEXTURE_LEVELS (brian)
+ * 1.3 - Add cmdbuf ioctl (keith)
+ *     - Add support for new radeon packets (keith)
+ *     - Add getparam ioctl (keith)
+ *     - Add flip-buffers ioctl, deprecate fullscreen foo (keith).
  */
 #define DRIVER_IOCTLS							     \
  [DRM_IOCTL_NR(DRM_IOCTL_DMA)]               = { radeon_cp_buffers,  1, 0 }, \
@@ -68,17 +72,10 @@
  [DRM_IOCTL_NR(DRM_IOCTL_RADEON_TEXTURE)]    = { radeon_cp_texture,  1, 0 }, \
  [DRM_IOCTL_NR(DRM_IOCTL_RADEON_STIPPLE)]    = { radeon_cp_stipple,  1, 0 }, \
  [DRM_IOCTL_NR(DRM_IOCTL_RADEON_INDIRECT)]   = { radeon_cp_indirect, 1, 1 }, \
- [DRM_IOCTL_NR(DRM_IOCTL_RADEON_VERTEX2)]    = { radeon_cp_vertex2,  1, 0 },
-
-
-#if 0
-/* GH: Count data sent to card via ring or vertex/indirect buffers.
- */
-#define __HAVE_COUNTERS         3
-#define __HAVE_COUNTER6         _DRM_STAT_IRQ
-#define __HAVE_COUNTER7         _DRM_STAT_PRIMARY
-#define __HAVE_COUNTER8         _DRM_STAT_SECONDARY
-#endif
+ [DRM_IOCTL_NR(DRM_IOCTL_RADEON_VERTEX2)]    = { radeon_cp_vertex2,  1, 0 }, \
+ [DRM_IOCTL_NR(DRM_IOCTL_RADEON_CMDBUF)]     = { radeon_cp_cmdbuf,   1, 0 }, \
+ [DRM_IOCTL_NR(DRM_IOCTL_RADEON_GETPARAM)]   = { radeon_cp_getparam, 1, 0 }, \
+ [DRM_IOCTL_NR(DRM_IOCTL_RADEON_FLIP)]       = { radeon_cp_flip,     1, 0 }, 
 
 
 #include "drm_agpsupport.h"
diff --git a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drv.h b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drv.h
index d6a900789..ba9f8de98 100644
--- a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drv.h
+++ b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_drv.h
@@ -74,14 +74,7 @@ typedef struct drm_radeon_private {
 
    	drm_radeon_freelist_t *head;
    	drm_radeon_freelist_t *tail;
-/* FIXME: ROTATE_BUFS is a hask to cycle through bufs until freelist
-   code is used.  Note this hides a problem with the scratch register
-   (used to keep track of last buffer completed) being written to before
-   the last buffer has actually completed rendering. */
-#define ROTATE_BUFS 1
-#if ROTATE_BUFS
 	int last_buf;
-#endif
 	volatile u32 *scratch;
 
 	int usec_timeout;
@@ -123,10 +116,6 @@ typedef struct drm_radeon_private {
 
 typedef struct drm_radeon_buf_priv {
 	u32 age;
-	int prim;
-	int discard;
-	int dispatched;
-   	drm_radeon_freelist_t *list_entry;
 } drm_radeon_buf_priv_t;
 
 				/* radeon_cp.c */
@@ -181,6 +170,13 @@ extern int radeon_cp_indirect( struct inode *inode, struct file *filp,
 			       unsigned int cmd, unsigned long arg );
 extern int radeon_cp_vertex2( struct inode *inode, struct file *filp,
 			      unsigned int cmd, unsigned long arg );
+extern int radeon_cp_cmdbuf( struct inode *inode, struct file *filp,
+			      unsigned int cmd, unsigned long arg );
+extern int radeon_cp_getparam( struct inode *inode, struct file *filp,
+			      unsigned int cmd, unsigned long arg );
+extern int radeon_cp_flip( struct inode *inode, struct file *filp,
+			   unsigned int cmd, unsigned long arg );
+
 
 
 /* Register definitions, register access macros and drmAddMap constants
@@ -209,8 +205,6 @@ extern int radeon_cp_vertex2( struct inode *inode, struct file *filp,
 #	define RADEON_CRTC_OFFSET_FLIP_CNTL	(1 << 16)
 
 #define RADEON_RB3D_COLORPITCH		0x1c48
-#define RADEON_RB3D_DEPTHCLEARVALUE	0x1c30
-#define RADEON_RB3D_DEPTHXY_OFFSET	0x1c60
 
 #define RADEON_DP_GUI_MASTER_CNTL	0x146c
 #	define RADEON_GMC_SRC_PITCH_OFFSET_CNTL	(1 << 0)
@@ -301,9 +295,6 @@ extern int radeon_cp_vertex2( struct inode *inode, struct file *filp,
 #	define RADEON_ROP_ENABLE		(1 << 6)
 #	define RADEON_STENCIL_ENABLE		(1 << 7)
 #	define RADEON_Z_ENABLE			(1 << 8)
-#	define RADEON_DEPTH_XZ_OFFEST_ENABLE	(1 << 9)
-#	define RADEON_ZBLOCK8			(0 << 15)
-#	define RADEON_ZBLOCK16			(1 << 15)
 #define RADEON_RB3D_DEPTHOFFSET		0x1c24
 #define RADEON_RB3D_PLANEMASK		0x1d84
 #define RADEON_RB3D_STENCILREFMASK	0x1d7c
@@ -369,6 +360,15 @@ extern int radeon_cp_vertex2( struct inode *inode, struct file *filp,
 #define RADEON_SE_LINE_WIDTH		0x1db8
 #define RADEON_SE_VPORT_XSCALE		0x1d98
 #define RADEON_SE_ZBIAS_FACTOR		0x1db0
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED 0x2210
+#define RADEON_SE_TCL_OUTPUT_VTX_FMT         0x2254
+#define RADEON_SE_TCL_VECTOR_INDX_REG        0x2200
+#       define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT  16
+#       define RADEON_VEC_INDX_DWORD_COUNT_SHIFT     28
+#define RADEON_SE_TCL_VECTOR_DATA_REG       0x2204
+#define RADEON_SE_TCL_SCALAR_INDX_REG       0x2208
+#       define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT  16
+#define RADEON_SE_TCL_SCALAR_DATA_REG       0x220C
 #define RADEON_SURFACE_ACCESS_FLAGS	0x0bf8
 #define RADEON_SURFACE_ACCESS_CLR	0x0bfc
 #define RADEON_SURFACE_CNTL		0x0b00
@@ -470,8 +470,10 @@ extern int radeon_cp_vertex2( struct inode *inode, struct file *filp,
 #define RADEON_CP_PACKET3		0xC0000000
 #	define RADEON_3D_RNDR_GEN_INDX_PRIM	0x00002300
 #	define RADEON_WAIT_FOR_IDLE		0x00002600
+#	define RADEON_3D_DRAW_VBUF		0x00002800
 #	define RADEON_3D_DRAW_IMMD		0x00002900
-#	define RADEON_3D_CLEAR_ZMASK		0x00003200
+#	define RADEON_3D_DRAW_INDX		0x00002A00
+#	define RADEON_3D_LOAD_VBPNTR		0x00002F00
 #	define RADEON_CNTL_HOSTDATA_BLT		0x00009400
 #	define RADEON_CNTL_PAINT_MULTI		0x00009A00
 #	define RADEON_CNTL_BITBLT_MULTI		0x00009B00
@@ -483,6 +485,7 @@ extern int radeon_cp_vertex2( struct inode *inode, struct file *filp,
 #define RADEON_CP_PACKET1_REG1_MASK	0x003ff800
 
 #define RADEON_VTX_Z_PRESENT			(1 << 31)
+#define RADEON_VTX_PKCOLOR_PRESENT		(1 << 3)
 
 #define RADEON_PRIM_TYPE_NONE			(0 << 0)
 #define RADEON_PRIM_TYPE_POINT			(1 << 0)
@@ -696,7 +699,7 @@ do {									\
 
 #define RADEON_VERBOSE	0
 
-#define RING_LOCALS	int write; unsigned int mask; volatile u32 *ring;
+#define RING_LOCALS	int write, _nr; unsigned int mask; volatile u32 *ring;
 
 #define BEGIN_RING( n ) do {						\
 	if ( RADEON_VERBOSE ) {						\
@@ -704,9 +707,10 @@ do {									\
 			   n, __FUNCTION__ );				\
 	}								\
 	if ( dev_priv->ring.space <= (n) * sizeof(u32) ) {		\
+                COMMIT_RING();						\
 		radeon_wait_ring( dev_priv, (n) * sizeof(u32) );	\
 	}								\
-	dev_priv->ring.space -= (n) * sizeof(u32);			\
+	_nr = n; dev_priv->ring.space -= (n) * sizeof(u32);		\
 	ring = dev_priv->ring.start;					\
 	write = dev_priv->ring.tail;					\
 	mask = dev_priv->ring.tail_mask;				\
@@ -717,9 +721,17 @@ do {									\
 		DRM_INFO( "ADVANCE_RING() wr=0x%06x tail=0x%06x\n",	\
 			  write, dev_priv->ring.tail );			\
 	}								\
-	radeon_flush_write_combine();					\
-	dev_priv->ring.tail = write;					\
-	RADEON_WRITE( RADEON_CP_RB_WPTR, write );			\
+	if (((dev_priv->ring.tail + _nr) & mask) != write) {		\
+		DRM_ERROR( 						\
+			"ADVANCE_RING(): mismatch: nr: %x write: %x\n",	\
+			((dev_priv->ring.tail + _nr) & mask),		\
+			write);						\
+	} else								\
+		dev_priv->ring.tail = write;				\
+} while (0)
+
+#define COMMIT_RING() do {					    \
+	RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail );		    \
 } while (0)
 
 #define OUT_RING( x ) do {						\
@@ -736,6 +748,30 @@ do {									\
 	OUT_RING( val );						\
 } while (0)
 
+
+#define OUT_RING_USER_TABLE( tab, sz ) do {			\
+	int _size = (sz);					\
+	int *_tab = (tab);					\
+								\
+	if (write + _size > mask) {				\
+		int i = (mask+1) - write;			\
+		if (__copy_from_user( (int *)(ring+write),	\
+				      _tab, i*4 ))		\
+			return -EFAULT;				\
+		write = 0;					\
+		_size -= i;					\
+		_tab += i;					\
+	}							\
+								\
+	if (_size && __copy_from_user( (int *)(ring+write),	\
+			               _tab, _size*4 ))		\
+		return -EFAULT;					\
+								\
+	write += _size;						\
+	write &= mask;						\
+} while (0)
+
+
 #define RADEON_PERFORMANCE_BOXES	0
 
 #endif /* __RADEON_DRV_H__ */
diff --git a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_state.c b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_state.c
index cc518a0e1..08a8f8a83 100644
--- a/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_state.c
+++ b/xc/programs/Xserver/hw/xfree86/os-support/linux/drm/kernel/radeon_state.c
@@ -49,329 +49,210 @@ static inline void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv,
 		   box->x1, box->y1, box->x2, box->y2 );
 
 	BEGIN_RING( 4 );
-
 	OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) );
 	OUT_RING( (box->y1 << 16) | box->x1 );
-
 	OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) );
-	OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );
-
-	ADVANCE_RING();
-}
-
-static inline void radeon_emit_context( drm_radeon_private_t *dev_priv,
-					drm_radeon_context_regs_t *ctx )
-{
-	RING_LOCALS;
-	DRM_DEBUG( "    %s\n", __FUNCTION__ );
-
-	BEGIN_RING( 14 );
-
-	OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
-	OUT_RING( ctx->pp_misc );
-	OUT_RING( ctx->pp_fog_color );
-	OUT_RING( ctx->re_solid_color );
-	OUT_RING( ctx->rb3d_blendcntl );
-	OUT_RING( ctx->rb3d_depthoffset );
-	OUT_RING( ctx->rb3d_depthpitch );
-	OUT_RING( ctx->rb3d_zstencilcntl );
-
-	OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
-	OUT_RING( ctx->pp_cntl );
-	OUT_RING( ctx->rb3d_cntl );
-	OUT_RING( ctx->rb3d_coloroffset );
-
-	OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
-	OUT_RING( ctx->rb3d_colorpitch );
-
-	ADVANCE_RING();
-}
-
-static inline void radeon_emit_vertfmt( drm_radeon_private_t *dev_priv,
-					drm_radeon_context_regs_t *ctx )
-{
-	RING_LOCALS;
-	DRM_DEBUG( "    %s\n", __FUNCTION__ );
-
-	BEGIN_RING( 2 );
-
-	OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
-	OUT_RING( ctx->se_coord_fmt );
-
-	ADVANCE_RING();
-}
-
-static inline void radeon_emit_line( drm_radeon_private_t *dev_priv,
-					drm_radeon_context_regs_t *ctx )
-{
-	RING_LOCALS;
-/*  	printk( "    %s %x %x %x\n", __FUNCTION__,  */
-/*  		ctx->re_line_pattern, */
-/*  		ctx->re_line_state, */
-/*  		ctx->se_line_width); */
-
-	BEGIN_RING( 5 );
-
-	OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
-	OUT_RING( ctx->re_line_pattern );
-	OUT_RING( ctx->re_line_state );
-
-	OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
-	OUT_RING( ctx->se_line_width );
-
-	ADVANCE_RING();
-}
-
-static inline void radeon_emit_bumpmap( drm_radeon_private_t *dev_priv,
-					drm_radeon_context_regs_t *ctx )
-{
-	RING_LOCALS;
-	DRM_DEBUG( "    %s\n", __FUNCTION__ );
-
-	BEGIN_RING( 5 );
-
-	OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
-	OUT_RING( ctx->pp_lum_matrix );
-
-	OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
-	OUT_RING( ctx->pp_rot_matrix_0 );
-	OUT_RING( ctx->pp_rot_matrix_1 );
-
-	ADVANCE_RING();
-}
-
-static inline void radeon_emit_masks( drm_radeon_private_t *dev_priv,
-				      drm_radeon_context_regs_t *ctx )
-{
-	RING_LOCALS;
-	DRM_DEBUG( "    %s\n", __FUNCTION__ );
-
-	BEGIN_RING( 4 );
-
-	OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
-	OUT_RING( ctx->rb3d_stencilrefmask );
-	OUT_RING( ctx->rb3d_ropcntl );
-	OUT_RING( ctx->rb3d_planemask );
-
-	ADVANCE_RING();
-}
-
-static inline void radeon_emit_viewport( drm_radeon_private_t *dev_priv,
-					 drm_radeon_context_regs_t *ctx )
-{
-	RING_LOCALS;
-	DRM_DEBUG( "    %s\n", __FUNCTION__ );
-
-	BEGIN_RING( 7 );
-
-	OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
-	OUT_RING( ctx->se_vport_xscale );
-	OUT_RING( ctx->se_vport_xoffset );
-	OUT_RING( ctx->se_vport_yscale );
-	OUT_RING( ctx->se_vport_yoffset );
-	OUT_RING( ctx->se_vport_zscale );
-	OUT_RING( ctx->se_vport_zoffset );
-	ADVANCE_RING();
-}
-
-static inline void radeon_emit_setup( drm_radeon_private_t *dev_priv,
-				      drm_radeon_context_regs_t *ctx )
-{
-	RING_LOCALS;
-	DRM_DEBUG( "    %s\n", __FUNCTION__ );
-
-	BEGIN_RING( 4 );
-
-	OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
-	OUT_RING( ctx->se_cntl );
-	OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
-	OUT_RING( ctx->se_cntl_status );
-
-	ADVANCE_RING();
-}
-
-
-static inline void radeon_emit_misc( drm_radeon_private_t *dev_priv,
-				     drm_radeon_context_regs_t *ctx )
-{
-	RING_LOCALS;
-	DRM_DEBUG( "    %s\n", __FUNCTION__ );
-
-	BEGIN_RING( 2 );
-
-	OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
-	OUT_RING( ctx->re_misc );
-
-	ADVANCE_RING();
-}
-
-static inline void radeon_emit_tex0( drm_radeon_private_t *dev_priv,
-				     drm_radeon_texture_regs_t *tex )
-{
-	RING_LOCALS;
-	DRM_DEBUG( "    %s: offset=0x%x\n", __FUNCTION__, tex->pp_txoffset );
-
-	BEGIN_RING( 9 );
-
-	OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
-	OUT_RING( tex->pp_txfilter );
-	OUT_RING( tex->pp_txformat );
-	OUT_RING( tex->pp_txoffset );
-	OUT_RING( tex->pp_txcblend );
-	OUT_RING( tex->pp_txablend );
-	OUT_RING( tex->pp_tfactor );
-
-	OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
-	OUT_RING( tex->pp_border_color );
-
-	ADVANCE_RING();
-}
-
-static inline void radeon_emit_tex1( drm_radeon_private_t *dev_priv,
-				     drm_radeon_texture_regs_t *tex )
-{
-	RING_LOCALS;
-	DRM_DEBUG( "    %s: offset=0x%x\n", __FUNCTION__, tex->pp_txoffset );
-
-	BEGIN_RING( 9 );
-
-	OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
-	OUT_RING( tex->pp_txfilter );
-	OUT_RING( tex->pp_txformat );
-	OUT_RING( tex->pp_txoffset );
-	OUT_RING( tex->pp_txcblend );
-	OUT_RING( tex->pp_txablend );
-	OUT_RING( tex->pp_tfactor );
-
-	OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
-	OUT_RING( tex->pp_border_color );
-
+/*	OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );*/
+	OUT_RING( (box->y2 << 16) | box->x2 );
 	ADVANCE_RING();
 }
 
-static inline void radeon_emit_tex2( drm_radeon_private_t *dev_priv,
-				     drm_radeon_texture_regs_t *tex )
+/* Emit 1.1 state
+ */
+static void radeon_emit_state( drm_radeon_private_t *dev_priv,
+			       drm_radeon_context_regs_t *ctx,
+			       drm_radeon_texture_regs_t *tex,
+			       unsigned int dirty )
 {
 	RING_LOCALS;
-	DRM_DEBUG( "    %s\n", __FUNCTION__ );
-
-	BEGIN_RING( 9 );
-
-	OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
-	OUT_RING( tex->pp_txfilter );
-	OUT_RING( tex->pp_txformat );
-	OUT_RING( tex->pp_txoffset );
-	OUT_RING( tex->pp_txcblend );
-	OUT_RING( tex->pp_txablend );
-	OUT_RING( tex->pp_tfactor );
-
-	OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
-	OUT_RING( tex->pp_border_color );
-
-	ADVANCE_RING();
-}
-
-#if 0
-static void radeon_print_dirty( const char *msg, unsigned int flags )
-{
-	DRM_DEBUG( "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s%s%s\n",
-		   msg,
-		   flags,
-		   (flags & RADEON_UPLOAD_CONTEXT)     ? "context, " : "",
-		   (flags & RADEON_UPLOAD_VERTFMT)     ? "vertfmt, " : "",
-		   (flags & RADEON_UPLOAD_LINE)        ? "line, " : "",
-		   (flags & RADEON_UPLOAD_BUMPMAP)     ? "bumpmap, " : "",
-		   (flags & RADEON_UPLOAD_MASKS)       ? "masks, " : "",
-		   (flags & RADEON_UPLOAD_VIEWPORT)    ? "viewport, " : "",
-		   (flags & RADEON_UPLOAD_SETUP)       ? "setup, " : "",
-		   (flags & RADEON_UPLOAD_MISC)        ? "misc, " : "",
-		   (flags & RADEON_UPLOAD_TEX0)        ? "tex0, " : "",
-		   (flags & RADEON_UPLOAD_TEX1)        ? "tex1, " : "",
-		   (flags & RADEON_UPLOAD_TEX2)        ? "tex2, " : "",
-		   (flags & RADEON_UPLOAD_CLIPRECTS)   ? "cliprects, " : "",
-		   (flags & RADEON_REQUIRE_QUIESCENCE) ? "quiescence, " : "" );
-}
-#endif
-
-static inline void radeon_emit_state( drm_radeon_private_t *dev_priv,
-				      drm_radeon_context_regs_t *ctx,
-				      drm_radeon_texture_regs_t *tex,
-				      unsigned int dirty )
-{
 	DRM_DEBUG( "%s: dirty=0x%08x\n", __FUNCTION__, dirty );
 
 	if ( dirty & RADEON_UPLOAD_CONTEXT ) {
-		radeon_emit_context( dev_priv, ctx );
+		BEGIN_RING( 14 );
+		OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
+		OUT_RING( ctx->pp_misc );
+		OUT_RING( ctx->pp_fog_color );
+		OUT_RING( ctx->re_solid_color );
+		OUT_RING( ctx->rb3d_blendcntl );
+		OUT_RING( ctx->rb3d_depthoffset );
+		OUT_RING( ctx->rb3d_depthpitch );
+		OUT_RING( ctx->rb3d_zstencilcntl );
+		OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
+		OUT_RING( ctx->pp_cntl );
+		OUT_RING( ctx->rb3d_cntl );
+		OUT_RING( ctx->rb3d_coloroffset );
+		OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
+		OUT_RING( ctx->rb3d_colorpitch );
+		ADVANCE_RING();
 	}
 
 	if ( dirty & RADEON_UPLOAD_VERTFMT ) {
-		radeon_emit_vertfmt( dev_priv, ctx );
+		BEGIN_RING( 2 );
+		OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
+		OUT_RING( ctx->se_coord_fmt );
+		ADVANCE_RING();
 	}
 
 	if ( dirty & RADEON_UPLOAD_LINE ) {
-		radeon_emit_line( dev_priv, ctx );
+		BEGIN_RING( 5 );
+		OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
+		OUT_RING( ctx->re_line_pattern );
+		OUT_RING( ctx->re_line_state );
+		OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
+		OUT_RING( ctx->se_line_width );
+		ADVANCE_RING();
 	}
 
 	if ( dirty & RADEON_UPLOAD_BUMPMAP ) {
-		radeon_emit_bumpmap( dev_priv, ctx );
+		BEGIN_RING( 5 );
+		OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
+		OUT_RING( ctx->pp_lum_matrix );
+		OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
+		OUT_RING( ctx->pp_rot_matrix_0 );
+		OUT_RING( ctx->pp_rot_matrix_1 );
+		ADVANCE_RING();
 	}
 
 	if ( dirty & RADEON_UPLOAD_MASKS ) {
-		radeon_emit_masks( dev_priv, ctx );
+		BEGIN_RING( 4 );
+		OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
+		OUT_RING( ctx->rb3d_stencilrefmask );
+		OUT_RING( ctx->rb3d_ropcntl );
+		OUT_RING( ctx->rb3d_planemask );
+		ADVANCE_RING();
 	}
 
 	if ( dirty & RADEON_UPLOAD_VIEWPORT ) {
-		radeon_emit_viewport( dev_priv, ctx );
+		BEGIN_RING( 7 );
+		OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
+		OUT_RING( ctx->se_vport_xscale );
+		OUT_RING( ctx->se_vport_xoffset );
+		OUT_RING( ctx->se_vport_yscale );
+		OUT_RING( ctx->se_vport_yoffset );
+		OUT_RING( ctx->se_vport_zscale );
+		OUT_RING( ctx->se_vport_zoffset );
+		ADVANCE_RING();
 	}
 
 	if ( dirty & RADEON_UPLOAD_SETUP ) {
-		radeon_emit_setup( dev_priv, ctx );
+		BEGIN_RING( 4 );
+		OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
+		OUT_RING( ctx->se_cntl );
+		OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
+		OUT_RING( ctx->se_cntl_status );
+		ADVANCE_RING();
 	}
 
 	if ( dirty & RADEON_UPLOAD_MISC ) {
-		radeon_emit_misc( dev_priv, ctx );
+		BEGIN_RING( 2 );
+		OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
+		OUT_RING( ctx->re_misc );
+		ADVANCE_RING();
 	}
 
 	if ( dirty & RADEON_UPLOAD_TEX0 ) {
-		radeon_emit_tex0( dev_priv, &tex[0] );
+		BEGIN_RING( 9 );
+		OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
+		OUT_RING( tex[0].pp_txfilter );
+		OUT_RING( tex[0].pp_txformat );
+		OUT_RING( tex[0].pp_txoffset );
+		OUT_RING( tex[0].pp_txcblend );
+		OUT_RING( tex[0].pp_txablend );
+		OUT_RING( tex[0].pp_tfactor );
+		OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
+		OUT_RING( tex[0].pp_border_color );
+		ADVANCE_RING();
 	}
 
 	if ( dirty & RADEON_UPLOAD_TEX1 ) {
-		radeon_emit_tex1( dev_priv, &tex[1] );
+		BEGIN_RING( 9 );
+		OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
+		OUT_RING( tex[1].pp_txfilter );
+		OUT_RING( tex[1].pp_txformat );
+		OUT_RING( tex[1].pp_txoffset );
+		OUT_RING( tex[1].pp_txcblend );
+		OUT_RING( tex[1].pp_txablend );
+		OUT_RING( tex[1].pp_tfactor );
+		OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
+		OUT_RING( tex[1].pp_border_color );
+		ADVANCE_RING();
 	}
 
 	if ( dirty & RADEON_UPLOAD_TEX2 ) {
-		radeon_emit_tex2( dev_priv, &tex[2] );
+		BEGIN_RING( 9 );
+		OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
+		OUT_RING( tex[2].pp_txfilter );
+		OUT_RING( tex[2].pp_txformat );
+		OUT_RING( tex[2].pp_txoffset );
+		OUT_RING( tex[2].pp_txcblend );
+		OUT_RING( tex[2].pp_txablend );
+		OUT_RING( tex[2].pp_tfactor );
+		OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
+		OUT_RING( tex[2].pp_border_color );
+		ADVANCE_RING();
 	}
 }
 
-
-
-static inline void radeon_emit_zbias( drm_radeon_private_t *dev_priv,
-				      drm_radeon_context2_regs_t *ctx )
+/* Emit 1.2 state
+ */
+static void radeon_emit_state2( drm_radeon_private_t *dev_priv,
+				drm_radeon_state_t *state )
 {
 	RING_LOCALS;
-/*  	printk( "    %s %x %x\n", __FUNCTION__, */
-/*  		ctx->se_zbias_factor, */
-/*  		ctx->se_zbias_constant ); */
-
-	BEGIN_RING( 3 );
-	OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
-  	OUT_RING( ctx->se_zbias_factor ); 
-  	OUT_RING( ctx->se_zbias_constant ); 
-	ADVANCE_RING();
-}
 
-static inline void radeon_emit_state2( drm_radeon_private_t *dev_priv,
-				       drm_radeon_state_t *state )
-{
-	if (state->dirty & RADEON_UPLOAD_ZBIAS)
-		radeon_emit_zbias( dev_priv, &state->context2 );
+	if (state->dirty & RADEON_UPLOAD_ZBIAS) {
+		BEGIN_RING( 3 );
+		OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
+		OUT_RING( state->context2.se_zbias_factor ); 
+		OUT_RING( state->context2.se_zbias_constant ); 
+		ADVANCE_RING();
+	}
 
 	radeon_emit_state( dev_priv, &state->context, 
 			   state->tex, state->dirty );
 }
 
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.  
+ */
+static struct { 
+	int start; 
+	int len; 
+	const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+	{ RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+	{ RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+	{ RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+	{ RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+	{ RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+	{ RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+	{ RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+	{ RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+	{ RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+	{ RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+	{ RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+	{ RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+	{ RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+	{ RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+	{ RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+	{ RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+	{ RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+	{ RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+};
+
+
+
+
+
+
+
+
+
+
 #if RADEON_PERFORMANCE_BOXES
 /* ================================================================
  * Performance monitoring functions
@@ -552,7 +433,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
 			radeon_emit_clip_rect( dev_priv,
 					       &sarea_priv->boxes[i] );
 
-			BEGIN_RING( 25 );
+			BEGIN_RING( 28 );
 
 			RADEON_WAIT_UNTIL_2D_IDLE();
 
@@ -569,32 +450,32 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
 			OUT_RING_REG( RADEON_SE_CNTL,
 				      depth_clear->se_cntl );
 
-			OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 10 ) );
-			OUT_RING( RADEON_VTX_Z_PRESENT );
+			/* Radeon 7500 doesn't like vertices without
+			 * color.
+			 */
+			OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
+			OUT_RING( RADEON_VTX_Z_PRESENT |
+				  RADEON_VTX_PKCOLOR_PRESENT);
 			OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
 				   RADEON_PRIM_WALK_RING |
 				   RADEON_MAOS_ENABLE |
 				   RADEON_VTX_FMT_RADEON_MODE |
 				   (3 << RADEON_NUM_VERTICES_SHIFT)) );
 
-/*  			printk( "depth box %d: %x %x %x %x\n",  */
-/*  				i, */
-/*  				depth_boxes[i].ui[CLEAR_X1], */
-/*  				depth_boxes[i].ui[CLEAR_Y1], */
-/*  				depth_boxes[i].ui[CLEAR_X2], */
-/*  				depth_boxes[i].ui[CLEAR_Y2]); */
-
 			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
 			OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
 			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
+			OUT_RING( 0x0 );
 
 			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
 			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
 			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
+			OUT_RING( 0x0 );
 
 			OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
 			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
 			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
+			OUT_RING( 0x0 );
 
 			ADVANCE_RING();
 
@@ -664,9 +545,17 @@ static void radeon_cp_dispatch_swap( drm_device_t *dev )
 			  RADEON_DP_SRC_SOURCE_MEMORY |
 			  RADEON_GMC_CLR_CMP_CNTL_DIS |
 			  RADEON_GMC_WR_MSK_DIS );
-
-		OUT_RING( dev_priv->back_pitch_offset );
-		OUT_RING( dev_priv->front_pitch_offset );
+		
+		/* Make this work even if front & back are flipped:
+		 */
+		if (dev_priv->current_page == 0) {
+			OUT_RING( dev_priv->back_pitch_offset );
+			OUT_RING( dev_priv->front_pitch_offset );
+		} 
+		else {
+			OUT_RING( dev_priv->front_pitch_offset );
+			OUT_RING( dev_priv->back_pitch_offset );
+		}
 
 		OUT_RING( (x << 16) | y );
 		OUT_RING( (x << 16) | y );
@@ -701,11 +590,12 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev )
 	radeon_cp_performance_boxes( dev_priv );
 #endif
 
-	BEGIN_RING( 6 );
+	BEGIN_RING( 4 );
 
 	RADEON_WAIT_UNTIL_3D_IDLE();
+/*
 	RADEON_WAIT_UNTIL_PAGE_FLIPPED();
-
+*/
 	OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET, 0 ) );
 
 	if ( dev_priv->current_page == 0 ) {
@@ -723,6 +613,7 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev )
 	 * performing the swapbuffer ioctl.
 	 */
 	dev_priv->sarea_priv->last_frame++;
+	dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
 
 	BEGIN_RING( 2 );
 
@@ -731,78 +622,75 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev )
 	ADVANCE_RING();
 }
 
-
-static void radeon_cp_dispatch_vertex( drm_device_t *dev,
-				       drm_buf_t *buf,
-				       drm_radeon_prim_t *prim )
+static int bad_prim_vertex_nr( int primitive, int nr )
 {
-	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
-	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
-	int offset = dev_priv->agp_buffers_offset + buf->offset + prim->start;
-	int numverts = (int)prim->numverts;
-	int i = 0;
-	RING_LOCALS;
-
-	DRM_DEBUG( __FUNCTION__": nbox=%d %d..%d prim %x nvert %d\n",
-		   sarea_priv->nbox, prim->start, prim->finish,
-		   prim->prim, numverts );
-
-	switch (prim->prim & RADEON_PRIM_TYPE_MASK) {
+	switch (primitive & RADEON_PRIM_TYPE_MASK) {
 	case RADEON_PRIM_TYPE_NONE:
 	case RADEON_PRIM_TYPE_POINT:
-		if (prim->numverts < 1) {
-			DRM_ERROR( "Bad nr verts for line %d\n",
-				   prim->numverts);
-			return;
-		}
-		break;
+		return nr < 1;
 	case RADEON_PRIM_TYPE_LINE:
-		if ((prim->numverts & 1) || prim->numverts == 0) {
-			DRM_ERROR( "Bad nr verts for line %d\n",
-				   prim->numverts);
-			return;
-		}
-		break;
+		return (nr & 1) || nr == 0;
 	case RADEON_PRIM_TYPE_LINE_STRIP:
-		if (prim->numverts < 2) {
-			DRM_ERROR( "Bad nr verts for line_strip %d\n",
-				   prim->numverts);
-			return;
-		}
-		break;
+		return nr < 2;
 	case RADEON_PRIM_TYPE_TRI_LIST:
 	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
 	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
 	case RADEON_PRIM_TYPE_RECT_LIST:
-		if (prim->numverts % 3 || prim->numverts == 0) {
-			DRM_ERROR( "Bad nr verts for tri %d\n",
-				   prim->numverts);
-			return;
-		}
-		break;
+		return nr % 3 || nr == 0;
 	case RADEON_PRIM_TYPE_TRI_FAN:
 	case RADEON_PRIM_TYPE_TRI_STRIP:
-		if (prim->numverts < 3) {
-			DRM_ERROR( "Bad nr verts for strip/fan %d\n",
-				   prim->numverts);
-			return;
-		}
-		break;
+		return nr < 3;
 	default:
-		DRM_ERROR( "buffer prim %x start %x\n", 
-			   prim->prim, prim->start );
-		return;
+		return 1;
 	}	
+}
+
 
 
-	buf_priv->dispatched = 1;
+typedef struct {
+	unsigned int start;
+	unsigned int finish;
+	unsigned int prim;
+	unsigned int numverts;
+	unsigned int offset;   
+        unsigned int vc_format;
+} drm_radeon_tcl_prim_t;
+
+static void radeon_cp_dispatch_vertex( drm_device_t *dev,
+				       drm_buf_t *buf,
+				       drm_radeon_tcl_prim_t *prim,
+				       drm_clip_rect_t *boxes,
+				       int nbox )
+
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	drm_clip_rect_t box;
+	int offset = dev_priv->agp_buffers_offset + buf->offset + prim->start;
+	int numverts = (int)prim->numverts;
+	int i = 0;
+	RING_LOCALS;
+
+	DRM_DEBUG("%s: hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
+		  __FUNCTION__,
+		  prim->prim,
+		  prim->vc_format,
+		  prim->start,
+		  prim->finish,
+		  prim->numverts);
+
+	if (bad_prim_vertex_nr( prim->prim, prim->numverts )) {
+		DRM_ERROR( "bad prim %x numverts %d\n", 
+			   prim->prim, prim->numverts );
+		return;
+	}
 
 	do {
 		/* Emit the next cliprect */
-		if ( i < sarea_priv->nbox ) {
-			radeon_emit_clip_rect( dev_priv,
-					       &sarea_priv->boxes[i] );
+		if ( i < nbox ) {
+			if (__copy_from_user( &box, &boxes[i], sizeof(box) ))
+				return;
+
+			radeon_emit_clip_rect( dev_priv, &box );
 		}
 
 		/* Emit the vertex buffer rendering commands */
@@ -820,19 +708,18 @@ static void radeon_cp_dispatch_vertex( drm_device_t *dev,
 		ADVANCE_RING();
 
 		i++;
-	} while ( i < sarea_priv->nbox );
-
-	dev_priv->sarea_priv->last_dispatch++;
+	} while ( i < nbox );
 }
 
 
+
 static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
 	RING_LOCALS;
 
-	buf_priv->age = dev_priv->sarea_priv->last_dispatch;
+	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
 
 	/* Emit the vertex buffer age */
 	BEGIN_RING( 2 );
@@ -841,8 +728,6 @@ static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
 
 	buf->pending = 1;
 	buf->used = 0;
-	/* FIXME: Check dispatched field */
-	buf_priv->dispatched = 0;
 }
 
 static void radeon_cp_dispatch_indirect( drm_device_t *dev,
@@ -850,7 +735,6 @@ static void radeon_cp_dispatch_indirect( drm_device_t *dev,
 					 int start, int end )
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
 	RING_LOCALS;
 	DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n",
 		   buf->idx, start, end );
@@ -871,8 +755,6 @@ static void radeon_cp_dispatch_indirect( drm_device_t *dev,
 			data[dwords++] = RADEON_CP_PACKET2;
 		}
 
-		buf_priv->dispatched = 1;
-
 		/* Fire off the indirect buffer */
 		BEGIN_RING( 3 );
 
@@ -882,112 +764,76 @@ static void radeon_cp_dispatch_indirect( drm_device_t *dev,
 
 		ADVANCE_RING();
 	}
-
-	dev_priv->sarea_priv->last_dispatch++;
 }
 
+
 static void radeon_cp_dispatch_indices( drm_device_t *dev,
 					drm_buf_t *elt_buf,
-					drm_radeon_prim_t *prim )
+					drm_radeon_tcl_prim_t *prim, 
+					drm_clip_rect_t *boxes,
+					int nbox )
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_buf_priv_t *buf_priv = elt_buf->dev_private;
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
-	int offset = dev_priv->agp_buffers_offset + prim->numverts * 64;
+	drm_clip_rect_t box;
+	int offset = dev_priv->agp_buffers_offset + prim->offset;
 	u32 *data;
 	int dwords;
 	int i = 0;
 	int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
 	int count = (prim->finish - start) / sizeof(u16);
 
-  	DRM_DEBUG( "indices: start=%x/%x end=%x count=%d nv %d offset %x\n",
-		   prim->start, start, prim->finish,
-		   count, prim->numverts, offset );
-
-	switch (prim->prim & RADEON_PRIM_TYPE_MASK) {
-	case RADEON_PRIM_TYPE_NONE:
-	case RADEON_PRIM_TYPE_POINT:
-		if (count < 1) {
-			DRM_ERROR( "Bad nr verts %d\n",
-				   count);
-			return;
-		}
-		break;
-	case RADEON_PRIM_TYPE_LINE:
-		if ((count & 1) || count == 0) {
-			DRM_ERROR( "Bad nr verts for line %d\n",
-				   count);
-			return;
-		}
-		break;
-	case RADEON_PRIM_TYPE_LINE_STRIP:
-		if (count < 2) {
-			DRM_ERROR( "Bad nr verts for line_strip %d\n",
-				   count);
-			return;
-		}
-		break;
-	case RADEON_PRIM_TYPE_TRI_LIST:
-	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
-	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
-	case RADEON_PRIM_TYPE_RECT_LIST:
-		if (count % 3 || count == 0) {
-			DRM_ERROR( "Bad nr verts for tri %d\n", count);
-			return;
-		}
-		break;
-	case RADEON_PRIM_TYPE_TRI_FAN:
-	case RADEON_PRIM_TYPE_TRI_STRIP:
-		if (count < 3) {
-			DRM_ERROR( "Bad nr verts for strip/fan %d\n", count);
-			return;
-		}
-		break;
-	default:
-		DRM_ERROR( "buffer prim %x start %x\n", 
-			   prim->prim, prim->start );
+	DRM_DEBUG("%s: hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
+		  __FUNCTION__,
+		  prim->prim,
+		  prim->vc_format,
+		  prim->start,
+		  prim->finish,
+		  prim->offset,
+		  prim->numverts);
+
+	if (bad_prim_vertex_nr( prim->prim, count )) {
+		DRM_ERROR( "bad prim %x count %d\n", 
+			   prim->prim, count );
 		return;
-	}	
+	}
 
-	if ( start < prim->finish ) {
-		buf_priv->dispatched = 1;
 
-		dwords = (prim->finish - prim->start + 3) / sizeof(u32);
+	if ( start >= prim->finish ||
+	     (prim->start & 0x7) ) {
+		DRM_ERROR( "buffer prim %d\n", prim->prim );
+		return;
+	}
 
-		data = (u32 *)((char *)dev_priv->buffers->handle +
-			       elt_buf->offset + prim->start);
+	dwords = (prim->finish - prim->start + 3) / sizeof(u32);
 
-		data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
-		data[1] = offset;
-		data[2] = RADEON_MAX_VB_VERTS;
-		data[3] = prim->vc_format;
-		data[4] = (prim->prim |
-			   RADEON_PRIM_WALK_IND |
-			   RADEON_COLOR_ORDER_RGBA |
-			   RADEON_VTX_FMT_RADEON_MODE |
-			   (count << RADEON_NUM_VERTICES_SHIFT) );
+	data = (u32 *)((char *)dev_priv->buffers->handle +
+		       elt_buf->offset + prim->start);
 
-		if ( count & 0x1 ) {
-			/* unnecessary? */
-			data[dwords-1] &= 0x0000ffff;
-		}
+	data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
+	data[1] = offset;
+	data[2] = prim->numverts;
+	data[3] = prim->vc_format;
+	data[4] = (prim->prim |
+		   RADEON_PRIM_WALK_IND |
+		   RADEON_COLOR_ORDER_RGBA |
+		   RADEON_VTX_FMT_RADEON_MODE |
+		   (count << RADEON_NUM_VERTICES_SHIFT) );
 
-		do {
-			/* Emit the next set of up to three cliprects */
-			if ( i < sarea_priv->nbox ) {
-				radeon_emit_clip_rect( dev_priv,
-						       &sarea_priv->boxes[i] );
-			}
+	do {
+		if ( i < nbox ) {
+			if (__copy_from_user( &box, &boxes[i], sizeof(box) ))
+				return;
+			
+			radeon_emit_clip_rect( dev_priv, &box );
+		}
 
-			radeon_cp_dispatch_indirect( dev, elt_buf,
-						     prim->start,
-						     prim->finish );
+		radeon_cp_dispatch_indirect( dev, elt_buf,
+					     prim->start,
+					     prim->finish );
 
-			i++;
-		} while ( i < sarea_priv->nbox );
-	}
+		i++;
+	} while ( i < nbox );
 
-	sarea_priv->last_dispatch++;
 }
 
 #define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32))
@@ -998,7 +844,6 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	drm_buf_t *buf;
-	drm_radeon_buf_priv_t *buf_priv;
 	u32 format;
 	u32 *buffer;
 	u8 *data;
@@ -1016,8 +861,6 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 		   tex->offset >> 10, tex->pitch, tex->format,
 		   image->x, image->y, image->width, image->height );
 
-	buf_priv = buf->dev_private;
-
 	/* The compiler won't optimize away a division by a variable,
 	 * even if the only legal values are powers of two.  Thus, we'll
 	 * use a shift instead.
@@ -1153,7 +996,6 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 
 	buf->pid = current->pid;
 	buf->used = (dwords + 8) * sizeof(u32);
-	buf_priv->discard = 1;
 
 	radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
 	radeon_cp_discard_buffer( dev, buf );
@@ -1223,25 +1065,73 @@ int radeon_cp_clear( struct inode *inode, struct file *filp,
 			     sarea_priv->nbox * sizeof(depth_boxes[0]) ) )
 		return -EFAULT;
 
-	/* Needed for depth clears via triangles???
-	 */
-	if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
-		radeon_emit_state( dev_priv,
-				   &sarea_priv->context_state,
-				   sarea_priv->tex_state,
-				   sarea_priv->dirty );
+	radeon_cp_dispatch_clear( dev, &clear, depth_boxes );
 
-		sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
-				       RADEON_UPLOAD_TEX1IMAGES |
-				       RADEON_UPLOAD_TEX2IMAGES |
-				       RADEON_REQUIRE_QUIESCENCE);
-	}
+	COMMIT_RING();
+	return 0;
+}
 
-	radeon_cp_dispatch_clear( dev, &clear, depth_boxes );
+
+
+/* Not sure why this isn't set all the time:
+ */ 
+static int radeon_do_init_pageflip( drm_device_t *dev )
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	DRM_DEBUG( "%s\n", __FUNCTION__ );
+
+	dev_priv->crtc_offset =      RADEON_READ( RADEON_CRTC_OFFSET );
+	dev_priv->crtc_offset_cntl = RADEON_READ( RADEON_CRTC_OFFSET_CNTL );
+
+	RADEON_WRITE( RADEON_CRTC_OFFSET, dev_priv->front_offset );
+	RADEON_WRITE( RADEON_CRTC_OFFSET_CNTL,
+		      dev_priv->crtc_offset_cntl |
+		      RADEON_CRTC_OFFSET_FLIP_CNTL );
+
+	dev_priv->page_flipping = 1;
+	dev_priv->current_page = 0;
+
+	return 0;
+}
+
+int radeon_do_cleanup_pageflip( drm_device_t *dev )
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	DRM_DEBUG( "%s\n", __FUNCTION__ );
+
+	RADEON_WRITE( RADEON_CRTC_OFFSET,      dev_priv->crtc_offset );
+	RADEON_WRITE( RADEON_CRTC_OFFSET_CNTL, dev_priv->crtc_offset_cntl );
+
+	dev_priv->page_flipping = 0;
+	dev_priv->current_page = 0;
 
 	return 0;
 }
 
+/* Swapping and flipping are different operations, need different ioctls.
+ * They can & should be intermixed to support multiple 3d windows.  
+ */
+int radeon_cp_flip( struct inode *inode, struct file *filp,
+		    unsigned int cmd, unsigned long arg )
+{
+	drm_file_t *priv = filp->private_data;
+	drm_device_t *dev = priv->dev;
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	DRM_DEBUG( "%s\n", __FUNCTION__ );
+
+	LOCK_TEST_WITH_RETURN( dev );
+
+	RING_SPACE_TEST_WITH_RETURN( dev_priv );
+
+	if (!dev_priv->page_flipping) 
+		radeon_do_init_pageflip( dev );
+		
+	radeon_cp_dispatch_flip( dev );
+
+	COMMIT_RING();
+	return 0;
+}
+
 int radeon_cp_swap( struct inode *inode, struct file *filp,
 		    unsigned int cmd, unsigned long arg )
 {
@@ -1258,13 +1148,10 @@ int radeon_cp_swap( struct inode *inode, struct file *filp,
 	if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
 
-	if ( !dev_priv->page_flipping ) {
-		radeon_cp_dispatch_swap( dev );
-		dev_priv->sarea_priv->ctx_owner = 0;
-	} else {
-		radeon_cp_dispatch_flip( dev );
-	}
+	radeon_cp_dispatch_swap( dev );
+	dev_priv->sarea_priv->ctx_owner = 0;
 
+	COMMIT_RING();
 	return 0;
 }
 
@@ -1277,9 +1164,8 @@ int radeon_cp_vertex( struct inode *inode, struct file *filp,
 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_device_dma_t *dma = dev->dma;
 	drm_buf_t *buf;
-	drm_radeon_buf_priv_t *buf_priv;
 	drm_radeon_vertex_t vertex;
-	drm_radeon_prim_t prim;
+	drm_radeon_tcl_prim_t prim;
 
 	LOCK_TEST_WITH_RETURN( dev );
 
@@ -1311,7 +1197,6 @@ int radeon_cp_vertex( struct inode *inode, struct file *filp,
 	VB_AGE_TEST_WITH_RETURN( dev_priv );
 
 	buf = dma->buflist[vertex.idx];
-	buf_priv = buf->dev_private;
 
 	if ( buf->pid != current->pid ) {
 		DRM_ERROR( "process %d using buffer owned by %d\n",
@@ -1323,9 +1208,11 @@ int radeon_cp_vertex( struct inode *inode, struct file *filp,
 		return -EINVAL;
 	}
 
-	buf->used = vertex.count; /* not used? */
-
+	/* Build up a prim_t record:
+	 */
 	if (vertex.count) {
+		buf->used = vertex.count; /* not used? */
+
 		if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
 			radeon_emit_state( dev_priv,
 					   &sarea_priv->context_state,
@@ -1338,22 +1225,22 @@ int radeon_cp_vertex( struct inode *inode, struct file *filp,
 					       RADEON_REQUIRE_QUIESCENCE);
 		}
 
-		/* Build up a prim_t record:
-		 */
 		prim.start = 0;
 		prim.finish = vertex.count; /* unused */
 		prim.prim = vertex.prim;
-		prim.stateidx = 0xff;	/* unused */
 		prim.numverts = vertex.count;
 		prim.vc_format = dev_priv->sarea_priv->vc_format;
 		
-		radeon_cp_dispatch_vertex( dev, buf, &prim );
+		radeon_cp_dispatch_vertex( dev, buf, &prim,
+					   dev_priv->sarea_priv->boxes,
+					   dev_priv->sarea_priv->nbox );
 	}
 
 	if (vertex.discard) {
 		radeon_cp_discard_buffer( dev, buf );
 	}
 
+	COMMIT_RING();
 	return 0;
 }
 
@@ -1366,9 +1253,8 @@ int radeon_cp_indices( struct inode *inode, struct file *filp,
 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_device_dma_t *dma = dev->dma;
 	drm_buf_t *buf;
-	drm_radeon_buf_priv_t *buf_priv;
 	drm_radeon_indices_t elts;
-	drm_radeon_prim_t prim;
+	drm_radeon_tcl_prim_t prim;
 	int count;
 
 	LOCK_TEST_WITH_RETURN( dev );
@@ -1401,7 +1287,6 @@ int radeon_cp_indices( struct inode *inode, struct file *filp,
 	VB_AGE_TEST_WITH_RETURN( dev_priv );
 
 	buf = dma->buflist[elts.idx];
-	buf_priv = buf->dev_private;
 
 	if ( buf->pid != current->pid ) {
 		DRM_ERROR( "process %d using buffer owned by %d\n",
@@ -1445,15 +1330,18 @@ int radeon_cp_indices( struct inode *inode, struct file *filp,
 	prim.start = elts.start;
 	prim.finish = elts.end; 
 	prim.prim = elts.prim;
-	prim.stateidx = 0xff;	/* unused */
-	prim.numverts = 0;	/* indexed from start of dma area */
+	prim.offset = 0;	/* offset from start of dma buffers */
+	prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
 	prim.vc_format = dev_priv->sarea_priv->vc_format;
 	
-	radeon_cp_dispatch_indices( dev, buf, &prim );
+	radeon_cp_dispatch_indices( dev, buf, &prim,
+				   dev_priv->sarea_priv->boxes,
+				   dev_priv->sarea_priv->nbox );
 	if (elts.discard) {
-	   radeon_cp_discard_buffer( dev, buf );
+		radeon_cp_discard_buffer( dev, buf );
 	}
 
+	COMMIT_RING();
 	return 0;
 }
 
@@ -1465,6 +1353,7 @@ int radeon_cp_texture( struct inode *inode, struct file *filp,
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	drm_radeon_texture_t tex;
 	drm_radeon_tex_image_t image;
+	int ret;
 
 	LOCK_TEST_WITH_RETURN( dev );
 
@@ -1484,7 +1373,10 @@ int radeon_cp_texture( struct inode *inode, struct file *filp,
 	RING_SPACE_TEST_WITH_RETURN( dev_priv );
 	VB_AGE_TEST_WITH_RETURN( dev_priv );
 
-	return radeon_cp_dispatch_texture( dev, &tex, &image );
+	ret = radeon_cp_dispatch_texture( dev, &tex, &image );
+
+	COMMIT_RING();
+	return ret;
 }
 
 int radeon_cp_stipple( struct inode *inode, struct file *filp,
@@ -1509,6 +1401,7 @@ int radeon_cp_stipple( struct inode *inode, struct file *filp,
 
 	radeon_cp_dispatch_stipple( dev, mask );
 
+	COMMIT_RING();
 	return 0;
 }
 
@@ -1520,7 +1413,6 @@ int radeon_cp_indirect( struct inode *inode, struct file *filp,
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	drm_device_dma_t *dma = dev->dma;
 	drm_buf_t *buf;
-	drm_radeon_buf_priv_t *buf_priv;
 	drm_radeon_indirect_t indirect;
 	RING_LOCALS;
 
@@ -1546,7 +1438,6 @@ int radeon_cp_indirect( struct inode *inode, struct file *filp,
 	}
 
 	buf = dma->buflist[indirect.idx];
-	buf_priv = buf->dev_private;
 
 	if ( buf->pid != current->pid ) {
 		DRM_ERROR( "process %d using buffer owned by %d\n",
@@ -1568,7 +1459,6 @@ int radeon_cp_indirect( struct inode *inode, struct file *filp,
 	VB_AGE_TEST_WITH_RETURN( dev_priv );
 
 	buf->used = indirect.end;
-	buf_priv->discard = indirect.discard;
 
 	/* Wait for the 3D stream to idle before the indirect buffer
 	 * containing 2D acceleration commands is processed.
@@ -1585,10 +1475,11 @@ int radeon_cp_indirect( struct inode *inode, struct file *filp,
 	 */
 	radeon_cp_dispatch_indirect( dev, buf, indirect.start, indirect.end );
 	if (indirect.discard) {
-	   radeon_cp_discard_buffer( dev, buf );
+		radeon_cp_discard_buffer( dev, buf );
 	}
 
 
+	COMMIT_RING();
 	return 0;
 }
 
@@ -1598,9 +1489,9 @@ int radeon_cp_vertex2( struct inode *inode, struct file *filp,
 	drm_file_t *priv = filp->private_data;
 	drm_device_t *dev = priv->dev;
 	drm_radeon_private_t *dev_priv = dev->dev_private;
+	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_device_dma_t *dma = dev->dma;
 	drm_buf_t *buf;
-	drm_radeon_buf_priv_t *buf_priv;
 	drm_radeon_vertex2_t vertex;
 	int i;
 	unsigned char laststate;
@@ -1629,7 +1520,6 @@ int radeon_cp_vertex2( struct inode *inode, struct file *filp,
 	VB_AGE_TEST_WITH_RETURN( dev_priv );
 
 	buf = dma->buflist[vertex.idx];
-	buf_priv = buf->dev_private;
 
 	if ( buf->pid != current->pid ) {
 		DRM_ERROR( "process %d using buffer owned by %d\n",
@@ -1641,23 +1531,17 @@ int radeon_cp_vertex2( struct inode *inode, struct file *filp,
 		DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
 		return -EINVAL;
 	}
+	
+	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
+		return -EINVAL;
 
 	for (laststate = 0xff, i = 0 ; i < vertex.nr_prims ; i++) {
 		drm_radeon_prim_t prim;
+		drm_radeon_tcl_prim_t tclprim;
 		
 		if ( copy_from_user( &prim, &vertex.prim[i], sizeof(prim) ) )
 			return -EFAULT;
 		
-/*    		printk( "prim %d vfmt %x hwprim %x start %d finish %d\n", */
-/*  			   i, prim.vc_format, prim.prim, */
-/*  			   prim.start, prim.finish ); */
-
-		if (  (prim.prim & RADEON_PRIM_TYPE_MASK) > 
-		      RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
-			DRM_ERROR( "buffer prim %d\n", prim.prim );
-			return -EINVAL;
-		}
-
 		if ( prim.stateidx != laststate ) {
 			drm_radeon_state_t state;			       
 				
@@ -1666,34 +1550,346 @@ int radeon_cp_vertex2( struct inode *inode, struct file *filp,
 					     sizeof(state) ) )
 				return -EFAULT;
 
-/*  			printk("emit state %d (%p) dirty %x\n", */
-/*  			       prim.stateidx, */
-/*  			       &vertex.state[prim.stateidx], */
-/*  			       state.dirty); */
-
 			radeon_emit_state2( dev_priv, &state );
 
 			laststate = prim.stateidx;
 		}
 
-		if ( prim.finish <= prim.start )
-			continue;
-
-		if ( prim.start & 0x7 ) {
-			DRM_ERROR( "misaligned buffer 0x%x\n", prim.start );
-			return -EINVAL;
-		}
+		tclprim.start = prim.start;
+		tclprim.finish = prim.finish;
+		tclprim.prim = prim.prim;
+		tclprim.vc_format = prim.vc_format;
 
 		if ( prim.prim & RADEON_PRIM_WALK_IND ) {
-			radeon_cp_dispatch_indices( dev, buf, &prim );
+			tclprim.offset = prim.numverts * 64;
+			tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
+
+			radeon_cp_dispatch_indices( dev, buf, &tclprim,
+						    sarea_priv->boxes,
+						    sarea_priv->nbox);
 		} else {
-			radeon_cp_dispatch_vertex( dev, buf, &prim );
+			tclprim.numverts = prim.numverts;
+			tclprim.offset = 0; /* not used */
+
+			radeon_cp_dispatch_vertex( dev, buf, &tclprim,
+						   sarea_priv->boxes,
+						   sarea_priv->nbox);
 		}
+		
+		if (sarea_priv->nbox == 1)
+			sarea_priv->nbox = 0;
 	}
 
 	if ( vertex.discard ) {
 		radeon_cp_discard_buffer( dev, buf );
 	}
 
+	COMMIT_RING();
+	return 0;
+}
+
+
+static int radeon_emit_packets( 
+	drm_radeon_private_t *dev_priv,
+	drm_radeon_cmd_header_t header,
+	drm_radeon_cmd_buffer_t *cmdbuf )
+{
+	int id = (int)header.packet.packet_id;
+	int sz = packet[id].len;
+	int reg = packet[id].start;
+	int *data = (int *)cmdbuf->buf;
+	RING_LOCALS;
+   
+	if (sz * sizeof(int) > cmdbuf->bufsz) 
+		return -EINVAL;
+
+	BEGIN_RING(sz+1);
+	OUT_RING( CP_PACKET0( reg, (sz-1) ) );
+	OUT_RING_USER_TABLE( data, sz );
+	ADVANCE_RING();
+
+	cmdbuf->buf += sz * sizeof(int);
+	cmdbuf->bufsz -= sz * sizeof(int);
+	return 0;
+}
+
+static inline int radeon_emit_scalars( 
+	drm_radeon_private_t *dev_priv,
+	drm_radeon_cmd_header_t header,
+	drm_radeon_cmd_buffer_t *cmdbuf )
+{
+	int sz = header.scalars.count;
+	int *data = (int *)cmdbuf->buf;
+	int start = header.scalars.offset;
+	int stride = header.scalars.stride;
+	RING_LOCALS;
+
+	BEGIN_RING( 3+sz );
+	OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
+	OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
+	OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
+	OUT_RING_USER_TABLE( data, sz );
+	ADVANCE_RING();
+	cmdbuf->buf += sz * sizeof(int);
+	cmdbuf->bufsz -= sz * sizeof(int);
 	return 0;
 }
+
+static inline int radeon_emit_vectors( 
+	drm_radeon_private_t *dev_priv,
+	drm_radeon_cmd_header_t header,
+	drm_radeon_cmd_buffer_t *cmdbuf )
+{
+	int sz = header.vectors.count;
+	int *data = (int *)cmdbuf->buf;
+	int start = header.vectors.offset;
+	int stride = header.vectors.stride;
+	RING_LOCALS;
+
+	BEGIN_RING( 3+sz );
+	OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) );
+	OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
+	OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) );
+	OUT_RING_USER_TABLE( data, sz );
+	ADVANCE_RING();
+
+	cmdbuf->buf += sz * sizeof(int);
+	cmdbuf->bufsz -= sz * sizeof(int);
+	return 0;
+}
+
+
+static int radeon_emit_packet3( drm_device_t *dev,
+				drm_radeon_cmd_buffer_t *cmdbuf )
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	int cmdsz, tmp;
+	int *cmd = (int *)cmdbuf->buf;
+	RING_LOCALS;
+
+
+	DRM_DEBUG("%s\n", __FUNCTION__);
+
+	if (__get_user( tmp, &cmd[0]))
+		return -EFAULT;
+
+	cmdsz = 2 + ((tmp & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+
+	if ((tmp & 0xc0000000) != RADEON_CP_PACKET3 ||
+	    cmdsz * 4 > cmdbuf->bufsz)
+		return -EINVAL;
+
+	BEGIN_RING( cmdsz );
+	OUT_RING_USER_TABLE( cmd, cmdsz );
+	ADVANCE_RING();
+
+	cmdbuf->buf += cmdsz * 4;
+	cmdbuf->bufsz -= cmdsz * 4;
+	return 0;
+}
+
+
+static int radeon_emit_packet3_cliprect( drm_device_t *dev,
+					 drm_radeon_cmd_buffer_t *cmdbuf )
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	drm_clip_rect_t box;
+	int cmdsz, tmp;
+	int *cmd = (int *)cmdbuf->buf;
+	drm_clip_rect_t *boxes = cmdbuf->boxes;
+	int i = 0;
+	RING_LOCALS;
+
+	DRM_DEBUG("%s\n", __FUNCTION__);
+
+	if (__get_user( tmp, &cmd[0]))
+		return -EFAULT;
+
+	cmdsz = 2 + ((tmp & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+
+	if ((tmp & 0xc0000000) != RADEON_CP_PACKET3 ||
+	    cmdsz * 4 > cmdbuf->bufsz)
+		return -EINVAL;
+
+	do {
+		if ( i < cmdbuf->nbox ) {
+			if (__copy_from_user( &box, &boxes[i], sizeof(box) ))
+				return -EFAULT;
+			radeon_emit_clip_rect( dev_priv, &box );
+		}
+		
+		BEGIN_RING( cmdsz );
+		OUT_RING_USER_TABLE( cmd, cmdsz );
+		ADVANCE_RING();
+
+	} while ( ++i < cmdbuf->nbox );
+
+ 	if (cmdbuf->nbox == 1)
+		cmdbuf->nbox = 0;
+
+	cmdbuf->buf += cmdsz * 4;
+	cmdbuf->bufsz -= cmdsz * 4;
+	return 0;
+}
+
+
+
+int radeon_cp_cmdbuf( struct inode *inode, struct file *filp,
+		      unsigned int cmd, unsigned long arg )
+{
+	drm_file_t *priv = filp->private_data;
+	drm_device_t *dev = priv->dev;
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	drm_device_dma_t *dma = dev->dma;
+	drm_buf_t *buf = 0;
+	int idx;
+	drm_radeon_cmd_buffer_t cmdbuf;
+	drm_radeon_cmd_header_t header;
+
+	LOCK_TEST_WITH_RETURN( dev );
+
+	if ( !dev_priv ) {
+		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
+		return -EINVAL;
+	}
+
+	if ( copy_from_user( &cmdbuf, (drm_radeon_cmd_buffer_t *)arg,
+			     sizeof(cmdbuf) ) ) {
+		DRM_ERROR("copy_from_user\n");
+		return -EFAULT;
+	}
+
+	DRM_DEBUG( __FUNCTION__": pid=%d\n", current->pid );
+	RING_SPACE_TEST_WITH_RETURN( dev_priv );
+	VB_AGE_TEST_WITH_RETURN( dev_priv );
+
+
+	if (verify_area( VERIFY_READ, cmdbuf.buf, cmdbuf.bufsz ))
+		return -EFAULT;
+
+	if (cmdbuf.nbox &&
+	    verify_area( VERIFY_READ, cmdbuf.boxes, 
+			 cmdbuf.nbox * sizeof(drm_clip_rect_t)))
+		return -EFAULT;
+
+	while ( cmdbuf.bufsz >= sizeof(header) ) {
+		
+		if (__get_user( header.i, (int *)cmdbuf.buf )) {
+			DRM_ERROR("__get_user %p\n", cmdbuf.buf);
+			return -EFAULT;
+		}
+
+		cmdbuf.buf += sizeof(header);
+		cmdbuf.bufsz -= sizeof(header);
+
+		switch (header.header.cmd_type) {
+		case RADEON_CMD_PACKET: 
+			if (radeon_emit_packets( dev_priv, header, &cmdbuf )) {
+				DRM_ERROR("radeon_emit_packets failed\n");
+				return -EINVAL;
+			}
+			break;
+
+		case RADEON_CMD_SCALARS:
+			if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {
+				DRM_ERROR("radeon_emit_scalars failed\n");
+				return -EINVAL;
+			}
+			break;
+
+		case RADEON_CMD_VECTORS:
+			if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {
+				DRM_ERROR("radeon_emit_vectors failed\n");
+				return -EINVAL;
+			}
+			break;
+
+		case RADEON_CMD_DMA_DISCARD:
+			idx = header.dma.buf_idx;
+			if ( idx < 0 || idx >= dma->buf_count ) {
+				DRM_ERROR( "buffer index %d (of %d max)\n",
+					   idx, dma->buf_count - 1 );
+				return -EINVAL;
+			}
+
+			buf = dma->buflist[idx];
+			if ( buf->pid != current->pid || buf->pending ) {
+				DRM_ERROR( "bad buffer\n" );
+				return -EINVAL;
+			}
+
+			radeon_cp_discard_buffer( dev, buf );
+			break;
+
+		case RADEON_CMD_PACKET3:
+			if (radeon_emit_packet3( dev, &cmdbuf )) {
+				DRM_ERROR("radeon_emit_packet3 failed\n");
+				return -EINVAL;
+			}
+			break;
+
+		case RADEON_CMD_PACKET3_CLIP:
+			if (radeon_emit_packet3_cliprect( dev, &cmdbuf )) {
+				DRM_ERROR("radeon_emit_packet3_clip failed\n");
+				return -EINVAL;
+			}
+			break;
+
+		default:
+			DRM_ERROR("bad cmd_type %d at %p\n", 
+				  header.header.cmd_type,
+				  cmdbuf.buf - sizeof(header));
+			return -EINVAL;
+		}
+	}
+
+
+	COMMIT_RING();
+	return 0;
+}
+
+
+
+int radeon_cp_getparam( struct inode *inode, struct file *filp,
+		      unsigned int cmd, unsigned long arg )
+{
+	drm_file_t *priv = filp->private_data;
+	drm_device_t *dev = priv->dev;
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	drm_radeon_getparam_t param;
+	int value;
+
+	if ( !dev_priv ) {
+		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
+		return -EINVAL;
+	}
+
+	if ( copy_from_user( &param, (drm_radeon_getparam_t *)arg,
+			     sizeof(param) ) ) {
+		DRM_ERROR("copy_from_user\n");
+		return -EFAULT;
+	}
+
+	DRM_DEBUG( __FUNCTION__": pid=%d\n", current->pid );
+
+	switch( param.param ) {
+	case RADEON_PARAM_AGP_BUFFER_OFFSET:
+		value = dev_priv->agp_buffers_offset;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if ( copy_to_user( param.value, &value, sizeof(int) ) ) {
+		DRM_ERROR( "copy_to_user\n" );
+		return -EFAULT;
+	}
+	
+	return 0;
+}
+
+
+
+
+
+
diff --git a/xc/programs/Xserver/hw/xfree86/os-support/shared/drm/kernel/drm.h b/xc/programs/Xserver/hw/xfree86/os-support/shared/drm/kernel/drm.h
index d116f3752..6ab295c48 100644
--- a/xc/programs/Xserver/hw/xfree86/os-support/shared/drm/kernel/drm.h
+++ b/xc/programs/Xserver/hw/xfree86/os-support/shared/drm/kernel/drm.h
@@ -84,6 +84,10 @@ typedef unsigned int  drm_magic_t;
 /* Warning: If you change this structure, make sure you change
  * XF86DRIClipRectRec in the server as well */
 
+/* KW: Actually it's illegal to change either for
+ * backwards-compatibility reasons.
+ */
+
 typedef struct drm_clip_rect {
 	unsigned short	x1;
 	unsigned short	y1;
diff --git a/xc/programs/Xserver/hw/xfree86/os-support/xf86drm.h b/xc/programs/Xserver/hw/xfree86/os-support/xf86drm.h
index d7859bbe7..a7b0b560d 100644
--- a/xc/programs/Xserver/hw/xfree86/os-support/xf86drm.h
+++ b/xc/programs/Xserver/hw/xfree86/os-support/xf86drm.h
@@ -218,6 +218,16 @@ typedef struct _drmTextureRegion {
     unsigned int  age;
 } drmTextureRegion, *drmTextureRegionPtr;
 
+
+typedef struct _drmClipRect {
+    unsigned short	x1; /* Upper left: inclusive */
+    unsigned short	y1;
+    unsigned short	x2; /* Lower right: exclusive */
+    unsigned short	y2;
+} drmClipRect, *drmClipRectPtr;
+
+
+
 #define __drm_dummy_lock(lock) (*(__volatile__ unsigned int *)lock)
 
 #define DRM_LOCK_HELD  0x80000000 /* Hardware lock is held                 */
author	keithw <keithw>	2002-06-12 15:50:23 +0000
committer	keithw <keithw>	2002-06-12 15:50:23 +0000
commit	9a7c4c799a7ddfe709e590e1eb9ad03102bbb838 (patch)
tree	4a5310fd7e1d615f2761c5912787195e095b0fa7
parent	80b74af98bae5fa1fd8d26b3b82d474a7bc4a9ac (diff)