35 files changed, 11515 insertions, 2078 deletions
diff --git a/xc/lib/GL/mesa/src/drv/radeon/Imakefile b/xc/lib/GL/mesa/src/drv/radeon/Imakefile
index 2a0922ccf..7cb34f1c3 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/Imakefile
+++ b/xc/lib/GL/mesa/src/drv/radeon/Imakefile
@@ -1,4 +1,4 @@
-XCOMM $XFree86: xc/lib/GL/mesa/src/drv/radeon/Imakefile,v 1.9 2002/02/23 00:45:50 dawes Exp $
+XCOMM $XFree86: xc/lib/GL/mesa/src/drv/radeon/Imakefile,v 1.11 2002/11/25 14:04:51 eich Exp $
 
 #include <Threads.tmpl>
 
@@ -25,7 +25,7 @@ XCOMM $XFree86: xc/lib/GL/mesa/src/drv/radeon/Imakefile,v 1.9 2002/02/23 00:45:5
 #include "../../tnl/Imakefile.inc"
 #include "../../tnl_dd/Imakefile.inc"
 #include "../../Imakefile.inc"
-#ifdef i386Architecture
+#if defined(i386Architecture) && MesaUseX86Asm
 #include "../../X86/Imakefile.inc"
 #endif
 #ifdef SparcArchitecture
@@ -40,8 +40,7 @@ XCOMM $XFree86: xc/lib/GL/mesa/src/drv/radeon/Imakefile,v 1.9 2002/02/23 00:45:5
       DRMOBJS = $(GLXLIBSRC)/dri/drm/xf86drm.o \
 		$(GLXLIBSRC)/dri/drm/xf86drmHash.o \
 		$(GLXLIBSRC)/dri/drm/xf86drmRandom.o \
-		$(GLXLIBSRC)/dri/drm/xf86drmSL.o \
-		$(GLXLIBSRC)/dri/drm/xf86drmRadeon.o
+		$(GLXLIBSRC)/dri/drm/xf86drmSL.o
 
 #ifdef GlxSoProf
        LOSRCS = ../../../../lowpc.c
@@ -55,7 +54,7 @@ XCOMM $XFree86: xc/lib/GL/mesa/src/drv/radeon/Imakefile,v 1.9 2002/02/23 00:45:5
          OBJS = $(LOOBJS) $(DRIOBJS) $(DRMOBJS) $(COREMESAOBJS) \
 		$(MESA_ASM_OBJS) $(COMMONOBJS) $(RADEONOBJS) $(HIOBJS)
 
-REQUIREDLIBS = MathLibrary $(LDPRELIB) $(GLXLIB)
+REQUIREDLIBS = MathLibrary $(LDPRELIB) $(GLXLIB) $(XONLYLIB)
 
 #include <Library.tmpl>
 
diff --git a/xc/lib/GL/mesa/src/drv/radeon/Imakefile.inc b/xc/lib/GL/mesa/src/drv/radeon/Imakefile.inc
index e564cf074..175a3e3bf 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/Imakefile.inc
+++ b/xc/lib/GL/mesa/src/drv/radeon/Imakefile.inc
@@ -1,4 +1,4 @@
-XCOMM $XFree86: xc/lib/GL/mesa/src/drv/radeon/Imakefile.inc,v 1.3 2002/02/22 21:45:00 dawes Exp $
+XCOMM $XFree86: xc/lib/GL/mesa/src/drv/radeon/Imakefile.inc,v 1.4 2002/10/30 12:51:54 alanh Exp $
 
 #ifndef MesaDrvSrcDir
 #define MesaDrvSrcDir $(GLXLIBSRC)/mesa/src/drv
@@ -26,7 +26,8 @@ ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL
 		-I$(XF86DRIVERSRC)/ati \
 		-I$(XF86COMSRC) \
 		-I$(GLXLIBSRC)/dri/drm \
-		-I$(GLXLIBSRC)/include
+		-I$(GLXLIBSRC)/include \
+		-I$(XTOP)/include
 #endif
 
 MESA_INCLUDES = -I$(MESASRCDIR)/src \
@@ -35,81 +36,141 @@ MESA_INCLUDES = -I$(MESASRCDIR)/src \
    X_INCLUDES = -I$(XINCLUDESRC) -I$(EXTINCSRC)
 
    RADEONSRCS = $(MESADRVRADEONBUILDDIR)radeon_context.c \
+		$(MESADRVRADEONBUILDDIR)radeon_compat.c \
 		$(MESADRVRADEONBUILDDIR)radeon_ioctl.c \
 		$(MESADRVRADEONBUILDDIR)radeon_lock.c \
-		$(MESADRVRADEONBUILDDIR)radeon_render.c \
+		$(MESADRVRADEONBUILDDIR)radeon_maos.c \
+	        $(MESADRVRADEONBUILDDIR)radeon_sanity.c \
 	        $(MESADRVRADEONBUILDDIR)radeon_screen.c \
 		$(MESADRVRADEONBUILDDIR)radeon_span.c \
 		$(MESADRVRADEONBUILDDIR)radeon_state.c \
+		$(MESADRVRADEONBUILDDIR)radeon_state_init.c \
+		$(MESADRVRADEONBUILDDIR)radeon_swtcl.c \
+		$(MESADRVRADEONBUILDDIR)radeon_tcl.c \
 		$(MESADRVRADEONBUILDDIR)radeon_tex.c \
 		$(MESADRVRADEONBUILDDIR)radeon_texmem.c \
 		$(MESADRVRADEONBUILDDIR)radeon_texstate.c \
-		$(MESADRVRADEONBUILDDIR)radeon_tris.c \
-		$(MESADRVRADEONBUILDDIR)radeon_vb.c 
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt.c \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt_x86.c \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxtmp_x86.S \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt_sse.c \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt_c.c
 
    RADEONOBJS = $(MESADRVRADEONBUILDDIR)radeon_context.o \
+		$(MESADRVRADEONBUILDDIR)radeon_compat.o \
 		$(MESADRVRADEONBUILDDIR)radeon_ioctl.o \
 		$(MESADRVRADEONBUILDDIR)radeon_lock.o \
-		$(MESADRVRADEONBUILDDIR)radeon_render.o \
+		$(MESADRVRADEONBUILDDIR)radeon_maos.o \
+	        $(MESADRVRADEONBUILDDIR)radeon_sanity.o \
 	        $(MESADRVRADEONBUILDDIR)radeon_screen.o \
 		$(MESADRVRADEONBUILDDIR)radeon_span.o \
 		$(MESADRVRADEONBUILDDIR)radeon_state.o \
+		$(MESADRVRADEONBUILDDIR)radeon_state_init.o \
+		$(MESADRVRADEONBUILDDIR)radeon_swtcl.o \
+		$(MESADRVRADEONBUILDDIR)radeon_tcl.o \
 		$(MESADRVRADEONBUILDDIR)radeon_tex.o \
 		$(MESADRVRADEONBUILDDIR)radeon_texmem.o \
 		$(MESADRVRADEONBUILDDIR)radeon_texstate.o \
-		$(MESADRVRADEONBUILDDIR)radeon_tris.o \
-		$(MESADRVRADEONBUILDDIR)radeon_vb.o 
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt.o \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt_c.o
+
+#ifdef i386Architecture
+   RADEONOBJS += $(MESADRVRADEONBUILDDIR)radeon_vtxfmt_x86.o \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxtmp_x86.o \
+		$(MESADRVRADEONBUILDDIR)radeon_vtxfmt_sse.o 
+#endif
 
    RADEONUOBJS = $(MESADRVRADEONBUILDDIR)unshared/radeon_context.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_compat.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_ioctl.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_lock.o \
-		$(MESADRVRADEONBUILDDIR)unshared/radeon_render.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_maos.o \
+	        $(MESADRVRADEONBUILDDIR)unshared/radeon_sanity.o \
 	        $(MESADRVRADEONBUILDDIR)unshared/radeon_screen.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_span.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_state.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_state_init.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_swtcl.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_tcl.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_tex.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_texmem.o \
 		$(MESADRVRADEONBUILDDIR)unshared/radeon_texstate.o \
-		$(MESADRVRADEONBUILDDIR)unshared/radeon_tris.o \
-		$(MESADRVRADEONBUILDDIR)unshared/radeon_vb.o 
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_vtxfmt.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_vtxfmt_c.o
+
+#ifdef i386Architecture
+   RADEONUOBJS += $(MESADRVRADEONBUILDDIR)unshared/radeon_vtxfmt_x86.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_vtxtmp_x86.o \
+		$(MESADRVRADEONBUILDDIR)unshared/radeon_vtxfmt_sse.o 
+#endif
 
    RADEONDOBJS = $(MESADRVRADEONBUILDDIR)debugger/radeon_context.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_compat.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_ioctl.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_lock.o \
-		$(MESADRVRADEONBUILDDIR)debugger/radeon_render.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_maos.o \
+	        $(MESADRVRADEONBUILDDIR)debugger/radeon_sanity.o \
 	        $(MESADRVRADEONBUILDDIR)debugger/radeon_screen.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_span.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_state.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_state_init.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_swtcl.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_tcl.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_tex.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_texmem.o \
 		$(MESADRVRADEONBUILDDIR)debugger/radeon_texstate.o \
-		$(MESADRVRADEONBUILDDIR)debugger/radeon_tris.o \
-		$(MESADRVRADEONBUILDDIR)debugger/radeon_vb.o 
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_vtxfmt.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_vtxfmt_c.o
+
+#ifdef i386Architecture
+   RADEONDOBJS += $(MESADRVRADEONBUILDDIR)debugger/radeon_vtxfmt_x86.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_vtxtmp_x86.o \
+		$(MESADRVRADEONBUILDDIR)debugger/radeon_vtxfmt_sse.o 
+#endif
 
    RADEONPOBJS = $(MESADRVRADEONBUILDDIR)profiled/radeon_context.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_compat.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_ioctl.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_lock.o \
-		$(MESADRVRADEONBUILDDIR)profiled/radeon_render.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_maos.o \
+	        $(MESADRVRADEONBUILDDIR)profiled/radeon_sanity.o \
 	        $(MESADRVRADEONBUILDDIR)profiled/radeon_screen.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_span.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_state.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_state_init.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_swtcl.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_tcl.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_tex.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_texmem.o \
 		$(MESADRVRADEONBUILDDIR)profiled/radeon_texstate.o \
-		$(MESADRVRADEONBUILDDIR)profiled/radeon_tris.o \
-		$(MESADRVRADEONBUILDDIR)profiled/radeon_vb.o 
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_vtxfmt.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_vtxfmt_c.o
+
+#ifdef i386Architecture
+   RADEONPOBJS += $(MESADRVRADEONBUILDDIR)profiled/radeon_vtxfmt_x86.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_vtxtmp_x86.o \
+		$(MESADRVRADEONBUILDDIR)profiled/radeon_vtxfmt_sse.o 
+#endif
 
 #ifdef NeedToLinkMesaSrc
 LinkSourceFile(radeon_context.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_compat.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_ioctl.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_lock.c, $(MESADRVSRCDIR)/radeon)
-LinkSourceFile(radeon_render.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_maos.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_sanity.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_screen.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_span.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_state.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_state_init.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_swtcl.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_tcl.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_tex.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_texmem.c, $(MESADRVSRCDIR)/radeon)
 LinkSourceFile(radeon_texstate.c, $(MESADRVSRCDIR)/radeon)
-LinkSourceFile(radeon_tris.c, $(MESADRVSRCDIR)/radeon)
-LinkSourceFile(radeon_vb.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_vtxfmt.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_vtxfmt_c.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_vtxfmt_x86.c, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_vtxtmp_x86.S, $(MESADRVSRCDIR)/radeon)
+LinkSourceFile(radeon_vtxfmt_sse.c, $(MESADRVSRCDIR)/radeon)
 #endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_compat.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_compat.c
new file mode 100644
index 000000000..5737c4d85
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_compat.c
@@ -0,0 +1,301 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_compat.c,v 1.1 2002/10/30 12:51:54 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+               Tungsten Graphics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "mem.h"
+
+static struct { 
+	int start; 
+	int len; 
+	const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+	{ RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+	{ RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+	{ RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+	{ RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+	{ RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+	{ RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+	{ RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+	{ RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+	{ RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+	{ RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+	{ RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+	{ RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+	{ RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+	{ RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+	{ RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+	{ RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+	{ RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+	{ RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+};
+
+
+static void radeonCompatEmitPacket( radeonContextPtr rmesa, 
+				    struct radeon_state_atom *state )
+{
+   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+   radeon_context_regs_t *ctx = &sarea->ContextState;
+   radeon_texture_regs_t *tex0 = &sarea->TexState[0];
+   radeon_texture_regs_t *tex1 = &sarea->TexState[1];
+   int i;
+   int *buf = state->cmd;
+
+   for ( i = 0 ; i < state->cmd_size ; ) {
+      drmRadeonCmdHeader *header = (drmRadeonCmdHeader *)&buf[i++];
+
+      if (RADEON_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id,
+		 packet[(int)header->packet.packet_id].name);
+
+      switch (header->packet.packet_id) {
+      case RADEON_EMIT_PP_MISC:
+	 ctx->pp_misc = buf[i++]; 
+	 ctx->pp_fog_color = buf[i++];
+	 ctx->re_solid_color = buf[i++];
+	 ctx->rb3d_blendcntl = buf[i++];
+	 ctx->rb3d_depthoffset = buf[i++];
+	 ctx->rb3d_depthpitch = buf[i++];
+	 ctx->rb3d_zstencilcntl = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+	 break;
+      case RADEON_EMIT_PP_CNTL:
+	 ctx->pp_cntl = buf[i++];
+	 ctx->rb3d_cntl = buf[i++];
+	 ctx->rb3d_coloroffset = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+	 break;
+      case RADEON_EMIT_RB3D_COLORPITCH:
+	 ctx->rb3d_colorpitch = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+	 break;
+      case RADEON_EMIT_RE_LINE_PATTERN:
+	 ctx->re_line_pattern = buf[i++];
+	 ctx->re_line_state = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_LINE;
+	 break;
+      case RADEON_EMIT_SE_LINE_WIDTH:
+	 ctx->se_line_width = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_LINE;
+	 break;
+      case RADEON_EMIT_PP_LUM_MATRIX:
+	 ctx->pp_lum_matrix = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
+	 break;
+      case RADEON_EMIT_PP_ROT_MATRIX_0:
+	 ctx->pp_rot_matrix_0 = buf[i++];
+	 ctx->pp_rot_matrix_1 = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
+	 break;
+      case RADEON_EMIT_RB3D_STENCILREFMASK:
+	 ctx->rb3d_stencilrefmask = buf[i++];
+	 ctx->rb3d_ropcntl = buf[i++];
+	 ctx->rb3d_planemask = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_MASKS;
+	 break;
+      case RADEON_EMIT_SE_VPORT_XSCALE:
+	 ctx->se_vport_xscale = buf[i++];
+	 ctx->se_vport_xoffset = buf[i++];
+	 ctx->se_vport_yscale = buf[i++];
+	 ctx->se_vport_yoffset = buf[i++];
+	 ctx->se_vport_zscale = buf[i++];
+	 ctx->se_vport_zoffset = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_VIEWPORT;
+	 break;
+      case RADEON_EMIT_SE_CNTL:
+	 ctx->se_cntl = buf[i++];
+	 ctx->se_coord_fmt = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT;
+	 break;
+      case RADEON_EMIT_SE_CNTL_STATUS:
+	 ctx->se_cntl_status = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_SETUP;
+	 break;
+      case RADEON_EMIT_RE_MISC:
+	 ctx->re_misc = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_MISC;
+	 break;
+      case RADEON_EMIT_PP_TXFILTER_0:
+	 tex0->pp_txfilter = buf[i++];
+	 tex0->pp_txformat = buf[i++];
+	 tex0->pp_txoffset = buf[i++];
+	 tex0->pp_txcblend = buf[i++];
+	 tex0->pp_txablend = buf[i++];
+	 tex0->pp_tfactor = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX0;
+	 break;
+      case RADEON_EMIT_PP_BORDER_COLOR_0:
+	 tex0->pp_border_color = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX0;
+	 break;
+      case RADEON_EMIT_PP_TXFILTER_1:
+	 tex1->pp_txfilter = buf[i++];
+	 tex1->pp_txformat = buf[i++];
+	 tex1->pp_txoffset = buf[i++];
+	 tex1->pp_txcblend = buf[i++];
+	 tex1->pp_txablend = buf[i++];
+	 tex1->pp_tfactor = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX1;
+	 break;
+      case RADEON_EMIT_PP_BORDER_COLOR_1:
+	 tex1->pp_border_color = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX1;
+	 break;
+
+      case RADEON_EMIT_SE_ZBIAS_FACTOR:
+	 i++;
+	 i++;
+	 break;
+
+      case RADEON_EMIT_PP_TXFILTER_2:
+      case RADEON_EMIT_PP_BORDER_COLOR_2:
+      case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
+      case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
+      default:
+	 /* These states aren't understood by radeon drm 1.1 */
+	 fprintf(stderr, "Tried to emit unsupported state\n");
+	 return;
+      }
+   }
+}
+
+
+
+static void radeonCompatEmitStateLocked( radeonContextPtr rmesa )
+{
+   struct radeon_state_atom *state, *tmp;
+
+   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->lost_context) {
+      if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
+	 fprintf(stderr, "%s - lost context\n", __FUNCTION__); 
+
+      foreach_s( state, tmp, &(rmesa->hw.clean) ) 
+	 move_to_tail(&(rmesa->hw.dirty), state );
+
+      rmesa->lost_context = 0;
+   }
+
+   foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
+      if (!state->is_tcl)
+	 radeonCompatEmitPacket( rmesa, state );
+      move_to_head( &(rmesa->hw.clean), state );
+   }
+}
+
+
+
+static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa,
+					     GLuint hw_primitive,
+					     GLuint nverts,
+					     XF86DRIClipRectPtr pbox,
+					     GLuint nbox )
+{
+   int i;
+
+   for ( i = 0 ; i < nbox ; ) {
+      int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox );
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      drmRadeonVertex vtx;
+      
+      rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS;
+      rmesa->sarea->nbox = nr - i;
+
+      for ( ; i < nr ; i++) 
+	 *b++ = pbox[i];
+      
+      if (RADEON_DEBUG & DEBUG_IOCTL)
+	 fprintf(stderr, 
+		 "RadeonFlushVertexBuffer: prim %x buf %d verts %d "
+		 "disc %d nbox %d\n",
+		 hw_primitive, 
+		 rmesa->dma.current.buf->buf->idx, 
+		 nverts, 
+		 nr == nbox,
+		 rmesa->sarea->nbox );
+
+      vtx.prim = hw_primitive;
+      vtx.idx = rmesa->dma.current.buf->buf->idx;
+      vtx.count = nverts;
+      vtx.discard = (nr == nbox);      
+
+      drmCommandWrite( rmesa->dri.fd, 
+		       DRM_RADEON_VERTEX,
+		       &vtx, sizeof(vtx));
+   }
+}
+
+
+
+/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer!  
+ */
+void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
+				GLuint vertex_format,
+				GLuint hw_primitive,
+				GLuint nrverts )
+{
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   LOCK_HARDWARE( rmesa );
+
+   radeonCompatEmitStateLocked( rmesa );
+   rmesa->sarea->vc_format = vertex_format;
+   
+   if (rmesa->state.scissor.enabled) {
+      radeonCompatEmitPrimitiveLocked( rmesa, 
+				       hw_primitive,
+				       nrverts,
+				       rmesa->state.scissor.pClipRects,
+				       rmesa->state.scissor.numClipRects );
+   }
+   else {
+      radeonCompatEmitPrimitiveLocked( rmesa, 
+				       hw_primitive,
+				       nrverts,
+				       rmesa->pClipRects,
+				       rmesa->numClipRects );
+   }
+
+
+   UNLOCK_HARDWARE( rmesa );
+}
+
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c
index 690e30af4..15e62e161 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_context.c,v 1.4 2002/09/10 00:39:39 dawes Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_context.c,v 1.7 2003/02/08 21:26:45 dawes Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -31,18 +31,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  * Authors:
  *   Kevin E. Martin <martin@valinux.com>
  *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
  *
  */
 
-#include <stdlib.h>
 
 #include "radeon_context.h"
 #include "radeon_ioctl.h"
 #include "radeon_state.h"
 #include "radeon_span.h"
 #include "radeon_tex.h"
-#include "radeon_tris.h"
-#include "radeon_vb.h"
+#include "radeon_swtcl.h"
+#include "radeon_tcl.h"
+#include "radeon_vtxfmt.h"
+#include "radeon_maos.h"
 
 #include "swrast/swrast.h"
 #include "swrast_setup/swrast_setup.h"
@@ -51,6 +53,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "tnl/tnl.h"
 #include "tnl/t_pipeline.h"
 
+#include "api_arrayelt.h"
 #include "context.h"
 #include "simple_list.h"
 #include "mem.h"
@@ -60,27 +63,18 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "X86/common_x86_asm.h"
 #endif
 
-#define RADEON_DATE	"20020221"
+#define RADEON_DATE	"20020611"
 
 #ifndef RADEON_DEBUG
-int RADEON_DEBUG = (0
-/*		    | DEBUG_ALWAYS_SYNC */
-/*		    | DEBUG_VERBOSE_API */
-/*		    | DEBUG_VERBOSE_MSG */
-/*		    | DEBUG_VERBOSE_LRU */
-/*		    | DEBUG_VERBOSE_DRI */
-/*		    | DEBUG_VERBOSE_IOCTL */
-/*		    | DEBUG_VERBOSE_2D */
-/*		    | DEBUG_VERBOSE_TEXTURE */
-   );
+int RADEON_DEBUG = (0);
 #endif
 
 
 
-/* Return the width and height of the current color buffer.
+/* Return the width and height of the given buffer.
  */
 static void radeonGetBufferSize( GLframebuffer *buffer,
-				   GLuint *width, GLuint *height )
+				 GLuint *width, GLuint *height )
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
@@ -100,10 +94,10 @@ static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
 
    switch ( name ) {
    case GL_VENDOR:
-      return (GLubyte *)"VA Linux Systems, Inc.";
+      return (GLubyte *)"Tungsten Graphics, Inc.";
 
    case GL_RENDERER:
-      sprintf( buffer, "Mesa DRI Radeon " RADEON_DATE );
+      sprintf( buffer, "Mesa DRI Radeon " RADEON_DATE);
 
       /* Append any chipset-specific information.  None yet.
        */
@@ -144,6 +138,18 @@ static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
       }
 #endif
 #endif
+
+      if ( rmesa->dri.drmMinor < 3 ) {
+	 strncat( buffer, " DRM-COMPAT", 11 );
+      }
+	 
+      if ( !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE) ) {
+	 strncat( buffer, " TCL", 4 );
+      }
+      else {
+	 strncat( buffer, " NO-TCL", 7 );
+      }
+
       return (GLubyte *)buffer;
 
    default:
@@ -151,76 +157,80 @@ static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
    }
 }
 
-/* Send all commands to the hardware.  If vertex buffers or indirect
- * buffers are in use, then we need to make sure they are sent to the
- * hardware.  All commands that are normally sent to the ring are
- * already considered `flushed'.
- */
-static void radeonFlush( GLcontext *ctx )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_FIREVERTICES( rmesa );
-
-   if ( rmesa->boxes ) {
-      LOCK_HARDWARE( rmesa );
-      radeonPerformanceBoxesLocked( rmesa );
-      UNLOCK_HARDWARE( rmesa );
-   }
-
-   /* Log the performance counters if necessary */
-   radeonPerformanceCounters( rmesa );
-}
-
-/* Make sure all commands have been sent to the hardware and have
- * completed processing.
+/* Extension strings exported by the R100 driver.
  */
-static void radeonFinish( GLcontext *ctx )
+static const char * const radeon_extensions[] =
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   /* Bump the performance counter */
-   rmesa->c_drawWaits++;
-   radeonFlush( ctx );
-   LOCK_HARDWARE( rmesa );
-   radeonWaitForIdleLocked( rmesa );
-   UNLOCK_HARDWARE( rmesa );
-}
-
+    "GL_ARB_multisample",
+    "GL_ARB_multitexture",
+    "GL_ARB_texture_border_clamp",
+    "GL_ARB_texture_compression",
+    "GL_ARB_texture_env_add",
+    "GL_ARB_texture_env_combine",
+    "GL_ARB_texture_env_dot3",
+    "GL_ARB_texture_mirrored_repeat",
+    "GL_EXT_blend_logic_op",
+    "GL_EXT_blend_subtract",
+/*    "GL_EXT_fog_coord", */
+    "GL_EXT_secondary_color",
+    "GL_EXT_texture_env_add",
+    "GL_EXT_texture_env_combine",
+    "GL_EXT_texture_env_dot3",
+    "GL_EXT_texture_filter_anisotropic",
+    "GL_EXT_texture_lod_bias",
+    "GL_ATI_texture_mirror_once",
+    "GL_IBM_texture_mirrored_repeat",
+    "GL_NV_blend_square",
+    "GL_SGIS_generate_mipmap",
+    "GL_SGIS_texture_border_clamp",
+    NULL
+};
 
 /* Initialize the extensions supported by this driver.
  */
 static void radeonInitExtensions( GLcontext *ctx )
 {
+   unsigned   i;
    _mesa_enable_imaging_extensions( ctx );
 
-   _mesa_enable_extension( ctx, "GL_ARB_multitexture" );
-   _mesa_enable_extension( ctx, "GL_ARB_texture_env_add" );
-
-   _mesa_enable_extension( ctx, "GL_EXT_blend_logic_op" );
-   _mesa_enable_extension( ctx, "GL_EXT_texture_env_add" );
-   _mesa_enable_extension( ctx, "GL_EXT_texture_env_combine" );
-   _mesa_enable_extension( ctx, "GL_EXT_texture_env_dot3" );
-   _mesa_enable_extension( ctx, "GL_EXT_texture_filter_anisotropic" );
-   _mesa_enable_extension( ctx, "GL_EXT_texture_lod_bias" );
-
+   for ( i = 0 ; radeon_extensions[i] != NULL ; i++ ) {
+      _mesa_enable_extension( ctx, radeon_extensions[i] );
+   }
 }
 
 extern const struct gl_pipeline_stage _radeon_render_stage;
-extern const struct gl_pipeline_stage _radeon_tcl_render_stage;
+extern const struct gl_pipeline_stage _radeon_tcl_stage;
 
 static const struct gl_pipeline_stage *radeon_pipeline[] = {
+
+   /* Try and go straight to t&l
+    */
+   &_radeon_tcl_stage,  
+
+   /* Catch any t&l fallbacks
+    */
    &_tnl_vertex_transform_stage,
    &_tnl_normal_transform_stage,
    &_tnl_lighting_stage,
    &_tnl_fog_coordinate_stage,
    &_tnl_texgen_stage,
    &_tnl_texture_transform_stage,
-				/* REMOVE: point attenuation stage */
-#if 1
-   &_radeon_render_stage,	/* ADD: unclipped rastersetup-to-dma */
-#endif
-   &_tnl_render_stage,
+
+   /* Try again to go to tcl? 
+    *     - no good for asymmetric-twoside (do with multipass)
+    *     - no good for asymmetric-unfilled (do with multipass)
+    *     - good for material
+    *     - good for texgen
+    *     - need to manipulate a bit of state
+    *
+    * - worth it/not worth it?
+    */
+			
+   /* Else do them here.
+    */
+   &_radeon_render_stage,
+   &_tnl_render_stage,		/* FALLBACK:  */
    0,
 };
 
@@ -231,9 +241,8 @@ static const struct gl_pipeline_stage *radeon_pipeline[] = {
 static void radeonInitDriverFuncs( GLcontext *ctx )
 {
     ctx->Driver.GetBufferSize		= radeonGetBufferSize;
+    ctx->Driver.ResizeBuffers           = _swrast_alloc_buffers;
     ctx->Driver.GetString		= radeonGetString;
-    ctx->Driver.Finish			= radeonFinish;
-    ctx->Driver.Flush			= radeonFlush;
 
     ctx->Driver.Error			= NULL;
     ctx->Driver.DrawPixels		= NULL;
@@ -286,11 +295,20 @@ radeonCreateContext( Display *dpy, const __GLcontextModes *glVisual,
    rmesa->dri.hwLock = &sPriv->pSAREA->lock;
    rmesa->dri.fd = sPriv->fd;
 
+   /* If we don't have 1.3, fallback to the 1.1 interfaces.
+    */
+   if (getenv("RADEON_COMPAT") || sPriv->drmMinor < 3 ) 
+      rmesa->dri.drmMinor = 1;
+   else
+      rmesa->dri.drmMinor = sPriv->drmMinor;
+
    rmesa->radeonScreen = radeonScreen;
    rmesa->sarea = (RADEONSAREAPrivPtr)((GLubyte *)sPriv->pSAREA +
 				       radeonScreen->sarea_priv_offset);
 
 
+   rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address;
+
    for ( i = 0 ; i < radeonScreen->numTexHeaps ; i++ ) {
       make_empty_list( &rmesa->texture.objects[i] );
       rmesa->texture.heap[i] = mmInit( 0, radeonScreen->texSize[i] );
@@ -299,9 +317,8 @@ radeonCreateContext( Display *dpy, const __GLcontextModes *glVisual,
    rmesa->texture.numHeaps = radeonScreen->numTexHeaps;
    make_empty_list( &rmesa->texture.swapped );
 
-   rmesa->RenderIndex = ~0;
-   rmesa->state.hw.dirty = RADEON_UPLOAD_CONTEXT_ALL;
-   rmesa->upload_cliprects = 1;
+   rmesa->swtcl.RenderIndex = ~0;
+   rmesa->lost_context = 1;
 
    /* KW: Set the maximum texture size small enough that we can
     * guarentee that both texture units can bind a maximal texture
@@ -338,17 +355,20 @@ radeonCreateContext( Display *dpy, const __GLcontextModes *glVisual,
    ctx->Const.MaxLineWidthAA = 10.0;
    ctx->Const.LineWidthGranularity = 0.0625;
 
+   /* Set maxlocksize (and hence vb size) small enough to avoid
+    * fallbacks in radeon_tcl.c.  ie. guarentee that all vertices can
+    * fit in a single dma buffer for indexed rendering of quad strips,
+    * etc.
+    */
+   ctx->Const.MaxArrayLockSize = 
+      MIN2( ctx->Const.MaxArrayLockSize,
+  	    RADEON_BUFFER_SIZE / RADEON_MAX_TCL_VERTSIZE );
+
    if (getenv("LIBGL_PERFORMANCE_BOXES"))
       rmesa->boxes = 1;
    else
       rmesa->boxes = 0;
 
-   {
-      const char *debug = getenv("LIBGL_DEBUG");
-      if (debug && strstr(debug, "fallbacks")) {
-         rmesa->debugFallbacks = GL_TRUE;
-      }
-   }
 
    /* Initialize the software rasterizer and helper modules.
     */
@@ -356,29 +376,150 @@ radeonCreateContext( Display *dpy, const __GLcontextModes *glVisual,
    _ac_CreateContext( ctx );
    _tnl_CreateContext( ctx );
    _swsetup_CreateContext( ctx );
-
+   _ae_create_context( ctx );
 
    /* Install the customized pipeline:
     */
    _tnl_destroy_pipeline( ctx );
    _tnl_install_pipeline( ctx, radeon_pipeline );
 
+   /* Try and keep materials and vertices separate:
+    */
+   _tnl_isolate_materials( ctx, GL_TRUE );
+
+
+/*     _mesa_allow_light_in_model( ctx, GL_FALSE ); */
+
    /* Configure swrast to match hardware characteristics:
     */
    _swrast_allow_pixel_fog( ctx, GL_FALSE );
    _swrast_allow_vertex_fog( ctx, GL_TRUE );
 
-   radeonInitVB( ctx );
+
+   _math_matrix_ctr( &rmesa->TexGenMatrix[0] );
+   _math_matrix_ctr( &rmesa->TexGenMatrix[1] );
+   _math_matrix_ctr( &rmesa->tmpmat );
+   _math_matrix_set_identity( &rmesa->TexGenMatrix[0] );
+   _math_matrix_set_identity( &rmesa->TexGenMatrix[1] );
+   _math_matrix_set_identity( &rmesa->tmpmat );
+
    radeonInitExtensions( ctx );
    radeonInitDriverFuncs( ctx );
    radeonInitIoctlFuncs( ctx );
    radeonInitStateFuncs( ctx );
    radeonInitSpanFuncs( ctx );
    radeonInitTextureFuncs( ctx );
-   radeonInitTriFuncs( ctx );
-
    radeonInitState( rmesa );
+   radeonInitSwtcl( ctx );
+
+   rmesa->do_irqs = (rmesa->radeonScreen->irq && !getenv("RADEON_NO_IRQS"));
+   rmesa->irqsEmitted = 0;
+   rmesa->iw.irq_seq = -1;
+
+   rmesa->do_usleeps = !getenv("RADEON_NO_USLEEPS");
+   
+#if DO_DEBUG
+   if (getenv("RADEON_DEBUG_FALLBACKS"))
+      RADEON_DEBUG |= DEBUG_FALLBACKS;
+
+   if (getenv("RADEON_DEBUG_TEXTURE"))
+      RADEON_DEBUG |= DEBUG_TEXTURE;
+
+   if (getenv("RADEON_DEBUG_IOCTL"))
+      RADEON_DEBUG |= DEBUG_IOCTL;
+
+   if (getenv("RADEON_DEBUG_PRIMS"))
+      RADEON_DEBUG |= DEBUG_PRIMS;
+
+   if (getenv("RADEON_DEBUG_VERTS"))
+      RADEON_DEBUG |= DEBUG_VERTS;
+
+   if (getenv("RADEON_DEBUG_STATE"))
+      RADEON_DEBUG |= DEBUG_STATE;
+
+   if (getenv("RADEON_DEBUG_CODEGEN"))
+      RADEON_DEBUG |= DEBUG_CODEGEN;
+
+   if (getenv("RADEON_DEBUG_VTXFMT"))
+      RADEON_DEBUG |= DEBUG_VFMT;
+
+   if (getenv("RADEON_DEBUG_VERBOSE"))
+      RADEON_DEBUG |= DEBUG_VERBOSE;
+
+   if (getenv("RADEON_DEBUG_DRI"))
+      RADEON_DEBUG |= DEBUG_DRI;
+
+   if (getenv("RADEON_DEBUG_DMA"))
+      RADEON_DEBUG |= DEBUG_DMA;
 
+   if (getenv("RADEON_DEBUG_SANITY"))
+      RADEON_DEBUG |= DEBUG_SANITY;
+
+   if (getenv("RADEON_DEBUG"))
+   {
+      const char *debug = getenv("RADEON_DEBUG");
+      if (strstr(debug, "fall")) 
+         RADEON_DEBUG |= DEBUG_FALLBACKS;
+
+      if (strstr(debug, "tex")) 
+         RADEON_DEBUG |= DEBUG_TEXTURE;
+
+      if (strstr(debug, "ioctl")) 
+         RADEON_DEBUG |= DEBUG_IOCTL;
+
+      if (strstr(debug, "prim")) 
+         RADEON_DEBUG |= DEBUG_PRIMS;
+
+      if (strstr(debug, "vert")) 
+         RADEON_DEBUG |= DEBUG_VERTS;
+
+      if (strstr(debug, "state")) 
+         RADEON_DEBUG |= DEBUG_STATE;
+
+      if (strstr(debug, "code")) 
+         RADEON_DEBUG |= DEBUG_CODEGEN;
+
+      if (strstr(debug, "vfmt") || strstr(debug, "vtxf")) 
+         RADEON_DEBUG |= DEBUG_VFMT;
+
+      if (strstr(debug, "verb")) 
+         RADEON_DEBUG |= DEBUG_VERBOSE;
+
+      if (strstr(debug, "dri")) 
+         RADEON_DEBUG |= DEBUG_DRI;
+
+      if (strstr(debug, "dma")) 
+         RADEON_DEBUG |= DEBUG_DMA;
+
+      if (strstr(debug, "san")) 
+         RADEON_DEBUG |= DEBUG_SANITY;
+   }
+
+
+#endif
+
+   if (getenv("RADEON_NO_RAST")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1); 
+   }
+   else if (getenv("RADEON_TCL_FORCE_ENABLE")) {
+      fprintf(stderr, "Enabling TCL support...  this will probably crash\n");
+      fprintf(stderr, "         your card if it isn't capable of TCL!\n");
+      rmesa->radeonScreen->chipset |= RADEON_CHIPSET_TCL;
+   } else if (getenv("RADEON_TCL_FORCE_DISABLE") ||
+	    rmesa->dri.drmMinor < 3 ||
+	    !(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
+      rmesa->radeonScreen->chipset &= ~RADEON_CHIPSET_TCL;
+      fprintf(stderr, "disabling TCL support\n");
+      TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1); 
+   }
+
+   if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) {
+      if (!getenv("RADEON_NO_VTXFMT"))
+	 radeonVtxfmtInit( ctx );
+
+      _tnl_need_dlist_norm_lengths( ctx, GL_FALSE );
+   }
    return GL_TRUE;
 }
 
@@ -396,6 +537,7 @@ radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
 
    /* check if we're deleting the currently bound context */
    if (rmesa == current) {
+      RADEON_FIREVERTICES( rmesa );
       _mesa_make_current2(NULL, NULL, NULL);
    }
 
@@ -414,6 +556,7 @@ radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
                radeonDestroyTexObj( rmesa, t );
             }
             mmDestroy( rmesa->texture.heap[i] );
+	    rmesa->texture.heap[i] = NULL;
          }
 
          foreach_s ( t, next_t, &rmesa->texture.swapped ) {
@@ -426,12 +569,27 @@ radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
       _ac_DestroyContext( rmesa->glCtx );
       _swrast_DestroyContext( rmesa->glCtx );
 
-      radeonFreeVB( rmesa->glCtx );
+      radeonDestroySwtcl( rmesa->glCtx );
+
+      radeonReleaseArrays( rmesa->glCtx, ~0 );
+      if (rmesa->dma.current.buf) {
+	 radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+	 radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+      }
+
+      if (!rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+	 if (!getenv("RADEON_NO_VTXFMT"))
+	    radeonVtxfmtDestroy( rmesa->glCtx );
 
       /* free the Mesa context */
       rmesa->glCtx->DriverCtx = NULL;
       _mesa_destroy_context( rmesa->glCtx );
 
+      if (rmesa->state.scissor.pClipRects) {
+	 FREE(rmesa->state.scissor.pClipRects);
+	 rmesa->state.scissor.pClipRects = 0;
+      }
+
       FREE( rmesa );
    }
 
@@ -508,6 +666,7 @@ radeonSwapBuffers(Display *dpy, void *drawablePrivate)
       ctx = rmesa->glCtx;
       if (ctx->Visual.doubleBufferMode) {
          _mesa_swapbuffers( ctx );  /* flush pending rendering comands */
+
          if ( rmesa->doPageFlip ) {
             radeonPageFlip( dPriv );
          }
@@ -532,21 +691,14 @@ radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
                    __DRIdrawablePrivate *driReadPriv )
 {
    if ( driContextPriv ) {
-      GET_CURRENT_CONTEXT(ctx);
-      radeonContextPtr oldRadeonCtx = ctx ? RADEON_CONTEXT(ctx) : NULL;
-      radeonContextPtr newRadeonCtx = (radeonContextPtr) driContextPriv->driverPrivate;
-
-      if ( newRadeonCtx != oldRadeonCtx ) {
-	 newRadeonCtx->state.hw.dirty = RADEON_UPLOAD_CONTEXT_ALL;
-	 if ( newRadeonCtx->state.texture.unit[0].texobj )
-	    newRadeonCtx->state.hw.dirty |= RADEON_UPLOAD_TEX0;
-	 if ( newRadeonCtx->state.texture.unit[1].texobj )
-	    newRadeonCtx->state.hw.dirty |= RADEON_UPLOAD_TEX1;
-      }
+      radeonContextPtr newRadeonCtx = 
+	 (radeonContextPtr) driContextPriv->driverPrivate;
+
+      if (RADEON_DEBUG & DEBUG_DRI)
+	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, newRadeonCtx->glCtx);
 
       if ( newRadeonCtx->dri.drawable != driDrawPriv ) {
 	 newRadeonCtx->dri.drawable = driDrawPriv;
-	 newRadeonCtx->upload_cliprects = 1;
 	 radeonUpdateWindow( newRadeonCtx->glCtx );
 	 radeonUpdateViewportOffset( newRadeonCtx->glCtx );
       }
@@ -559,10 +711,18 @@ radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
 	 _mesa_set_viewport( newRadeonCtx->glCtx, 0, 0,
 			     driDrawPriv->w, driDrawPriv->h );
       }
+
+      if (newRadeonCtx->vb.enabled)
+	 radeonVtxfmtMakeCurrent( newRadeonCtx->glCtx );
+
    } else {
+      if (RADEON_DEBUG & DEBUG_DRI)
+	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, NULL);
       _mesa_make_current( 0, 0 );
    }
 
+   if (RADEON_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "End %s\n", __FUNCTION__);
    return GL_TRUE;
 }
 
@@ -571,61 +731,30 @@ radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
 static GLboolean
 radeonUnbindContext( __DRIcontextPrivate *driContextPriv )
 {
-   return GL_TRUE;
-}
-
-/* Initialize the fullscreen mode.
- */
-static GLboolean
-radeonOpenFullScreen( __DRIcontextPrivate *driContextPriv )
-{
-#if 0
-   radeonContextPtr rmesa = (radeonContextPtr)driContextPriv->driverPrivate;
-   GLint ret;
-
-   /* FIXME: Do we need to check this?
-    */
-   if ( !rmesa->glCtx->Visual.doubleBufferMode )
-      return GL_TRUE;
-
-   LOCK_HARDWARE( rmesa );
-   radeonWaitForIdleLocked( rmesa );
-
-   /* Ignore errors.  If this fails, we simply don't do page flipping.
-    */
-   ret = drmRadeonFullScreen( rmesa->driFd, GL_TRUE );
+   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
 
-   UNLOCK_HARDWARE( rmesa );
+   if (RADEON_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, rmesa->glCtx);
 
-   rmesa->doPageFlip = ( ret == 0 );
-#endif
+   radeonVtxfmtUnbindContext( rmesa->glCtx );
    return GL_TRUE;
 }
 
-/* Shut down the fullscreen mode.
+/* Fullscreen mode isn't used for much -- could be a way to shrink
+ * front/back buffers & get more texture memory if the client has
+ * changed the video resolution.
+ * 
+ * Pageflipping is now done automatically whenever there is a single
+ * 3d client.
  */
 static GLboolean
-radeonCloseFullScreen( __DRIcontextPrivate *driContextPriv )
+radeonOpenCloseFullScreen( __DRIcontextPrivate *driContextPriv )
 {
-#if 0
-   radeonContextPtr rmesa = (radeonContextPtr)driContextPriv->driverPrivate;
-
-   LOCK_HARDWARE( rmesa );
-   radeonWaitForIdleLocked( rmesa );
-
-   /* Don't care if this fails, we're not page flipping anymore.
-    */
-   drmRadeonFullScreen( rmesa->driFd, GL_FALSE );
-
-   UNLOCK_HARDWARE( rmesa );
-
-   rmesa->doPageFlip = GL_FALSE;
-   rmesa->currentPage = 0;
-#endif
    return GL_TRUE;
 }
 
 
+
 /* This function is called by libGL.so as soon as libGL.so is loaded.
  * This is where we'd register new extension functions with the dispatcher.
  */
@@ -646,8 +775,8 @@ static struct __DriverAPIRec radeonAPI = {
    radeonSwapBuffers,
    radeonMakeCurrent,
    radeonUnbindContext,
-   radeonOpenFullScreen,
-   radeonCloseFullScreen
+   radeonOpenCloseFullScreen,
+   radeonOpenCloseFullScreen
 };
 
 
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h
index 736f4c654..051bbbcf5 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_context.h,v 1.4 2002/09/10 00:39:39 dawes Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_context.h,v 1.6 2002/12/16 16:18:58 dawes Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -31,7 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  * Authors:
  *   Kevin E. Martin <martin@valinux.com>
  *   Gareth Hughes <gareth@valinux.com>
- *   Keith Whitwell <keith_whitwell@yahoo.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
  */
 
 #ifndef __RADEON_CONTEXT_H__
@@ -39,34 +39,24 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #ifdef GLX_DIRECT_RENDERING
 
-#include <X11/Xlibint.h>
-
-#include "dri_util.h"
-
-#include "xf86drm.h"
-#include "xf86drmRadeon.h"
-
-#include "macros.h"
-#include "mtypes.h"
-
-#include "radeon_reg.h"
-
 struct radeon_context;
 typedef struct radeon_context radeonContextRec;
 typedef struct radeon_context *radeonContextPtr;
 
+#include "mtypes.h"
 #include "radeon_lock.h"
 #include "radeon_screen.h"
 #include "mm.h"
 
 /* Flags for software fallback cases */
-/* See correponding strings in radeon_tris.c */
+/* See correponding strings in radeon_swtcl.c */
 #define RADEON_FALLBACK_TEXTURE		0x0001
 #define RADEON_FALLBACK_DRAW_BUFFER	0x0002
 #define RADEON_FALLBACK_STENCIL		0x0004
 #define RADEON_FALLBACK_RENDER_MODE	0x0008
 #define RADEON_FALLBACK_BLEND_EQ	0x0010
 #define RADEON_FALLBACK_BLEND_FUNC	0x0020
+#define RADEON_FALLBACK_DISABLE 	0x0040
 
 /* Use the templated vertex format:
  */
@@ -87,12 +77,9 @@ typedef void (*radeon_line_func)( radeonContextPtr,
 typedef void (*radeon_point_func)( radeonContextPtr,
 				   radeonVertex * );
 
-typedef void (*radeon_prim_func)( GLcontext *ctx );
-
 
 struct radeon_colorbuffer_state {
    GLuint clear;
-
    GLint drawOffset, drawPitch;
 };
 
@@ -109,6 +96,10 @@ struct radeon_pixel_state {
 struct radeon_scissor_state {
    XF86DRIClipRectRec rect;
    GLboolean enabled;
+
+   GLuint numClipRects;			/* Cliprects active */
+   GLuint numAllocedClipRects;		/* Cliprects available */
+   XF86DRIClipRectPtr pClipRects;
 };
 
 struct radeon_stencilbuffer_state {
@@ -122,8 +113,9 @@ struct radeon_stipple_state {
 
 
 
-#define TEX_0 1
-#define TEX_1 2
+#define TEX_0   0x1
+#define TEX_1   0x2
+#define TEX_ALL 0x3
 
 typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
 
@@ -142,7 +134,13 @@ struct radeon_tex_obj {
 					   images need to be uploaded to
 					   local or AGP texture space */
 
-   GLint bound;				/* Texture unit currently bound to */
+   GLuint dirty_state;		        /* Flags (1 per texunit) for
+					   whether or not this texobj
+					   has dirty hardware state
+					   (pp_*) that needs to be
+					   brought into the
+					   texunit. */
+
    GLint heap;				/* Texture heap currently stored in */
 
    drmRadeonTexImage image[RADEON_MAX_TEXTURE_LEVELS];
@@ -173,10 +171,253 @@ struct radeon_texture_state {
    struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS];
 };
 
-struct radeon_state {
-   drmRadeonState hw;
+
+struct radeon_state_atom {
+   struct radeon_state_atom *next, *prev;
+   const char *name;		         /* for debug */
+   int cmd_size;		         /* size in bytes */
+   GLuint is_tcl;
+   int *cmd;			         /* one or more cmd's */
+   int *lastcmd;			 /* one or more cmd's */
+   GLboolean (*check)( GLcontext * );    /* is this state active? */
+};
+   
+
+
+/* Trying to keep these relatively short as the variables are becoming
+ * extravagently long.  Drop the RADEON_ off the front of everything -
+ * I think we know we're in the radeon driver by now, and keep the
+ * prefix to 3 letters unless absolutely impossible.  
+ */
+
+#define CTX_CMD_0             0
+#define CTX_PP_MISC           1
+#define CTX_PP_FOG_COLOR      2
+#define CTX_RE_SOLID_COLOR    3
+#define CTX_RB3D_BLENDCNTL    4
+#define CTX_RB3D_DEPTHOFFSET  5
+#define CTX_RB3D_DEPTHPITCH   6
+#define CTX_RB3D_ZSTENCILCNTL 7
+#define CTX_CMD_1             8
+#define CTX_PP_CNTL           9
+#define CTX_RB3D_CNTL         10
+#define CTX_RB3D_COLOROFFSET  11
+#define CTX_CMD_2             12
+#define CTX_RB3D_COLORPITCH   13
+#define CTX_STATE_SIZE        14
+
+#define SET_CMD_0               0
+#define SET_SE_CNTL             1
+#define SET_SE_COORDFMT         2
+#define SET_CMD_1               3
+#define SET_SE_CNTL_STATUS      4
+#define SET_STATE_SIZE          5
+
+#define LIN_CMD_0               0
+#define LIN_RE_LINE_PATTERN     1
+#define LIN_RE_LINE_STATE       2
+#define LIN_CMD_1               3
+#define LIN_SE_LINE_WIDTH       4
+#define LIN_STATE_SIZE          5
+
+#define MSK_CMD_0               0
+#define MSK_RB3D_STENCILREFMASK 1
+#define MSK_RB3D_ROPCNTL        2
+#define MSK_RB3D_PLANEMASK      3
+#define MSK_STATE_SIZE          4
+
+#define VPT_CMD_0           0
+#define VPT_SE_VPORT_XSCALE          1
+#define VPT_SE_VPORT_XOFFSET         2
+#define VPT_SE_VPORT_YSCALE          3
+#define VPT_SE_VPORT_YOFFSET         4
+#define VPT_SE_VPORT_ZSCALE          5
+#define VPT_SE_VPORT_ZOFFSET         6
+#define VPT_STATE_SIZE      7
+
+#define MSC_CMD_0               0
+#define MSC_RE_MISC             1
+#define MSC_STATE_SIZE          2
+
+#define TEX_CMD_0                   0
+#define TEX_PP_TXFILTER             1
+#define TEX_PP_TXFORMAT             2
+#define TEX_PP_TXOFFSET             3
+#define TEX_PP_TXCBLEND             4
+#define TEX_PP_TXABLEND             5
+#define TEX_PP_TFACTOR              6
+#define TEX_CMD_1                   7
+#define TEX_PP_BORDER_COLOR         8
+#define TEX_STATE_SIZE              9
+
+#define ZBS_CMD_0              0
+#define ZBS_SE_ZBIAS_FACTOR             1
+#define ZBS_SE_ZBIAS_CONSTANT           2
+#define ZBS_STATE_SIZE         3
+
+#define TCL_CMD_0                        0
+#define TCL_OUTPUT_VTXFMT         1
+#define TCL_OUTPUT_VTXSEL         2
+#define TCL_MATRIX_SELECT_0       3
+#define TCL_MATRIX_SELECT_1       4
+#define TCL_UCP_VERT_BLEND_CTL    5
+#define TCL_TEXTURE_PROC_CTL      6
+#define TCL_LIGHT_MODEL_CTL       7
+#define TCL_PER_LIGHT_CTL_0       8
+#define TCL_PER_LIGHT_CTL_1       9
+#define TCL_PER_LIGHT_CTL_2       10
+#define TCL_PER_LIGHT_CTL_3       11
+#define TCL_STATE_SIZE                   12
+
+#define MTL_CMD_0            0	
+#define MTL_EMMISSIVE_RED    1	
+#define MTL_EMMISSIVE_GREEN  2	
+#define MTL_EMMISSIVE_BLUE   3	
+#define MTL_EMMISSIVE_ALPHA  4	
+#define MTL_AMBIENT_RED      5
+#define MTL_AMBIENT_GREEN    6
+#define MTL_AMBIENT_BLUE     7
+#define MTL_AMBIENT_ALPHA    8
+#define MTL_DIFFUSE_RED      9
+#define MTL_DIFFUSE_GREEN    10
+#define MTL_DIFFUSE_BLUE     11
+#define MTL_DIFFUSE_ALPHA    12
+#define MTL_SPECULAR_RED     13
+#define MTL_SPECULAR_GREEN   14
+#define MTL_SPECULAR_BLUE    15
+#define MTL_SPECULAR_ALPHA   16
+#define MTL_SHININESS        17
+#define MTL_STATE_SIZE       18
+
+#define VTX_CMD_0              0
+#define VTX_SE_COORD_FMT       1
+#define VTX_STATE_SIZE         2
+
+#define MAT_CMD_0              0
+#define MAT_ELT_0              1
+#define MAT_STATE_SIZE         17
+
+#define GRD_CMD_0                  0
+#define GRD_VERT_GUARD_CLIP_ADJ    1
+#define GRD_VERT_GUARD_DISCARD_ADJ 2
+#define GRD_HORZ_GUARD_CLIP_ADJ    3
+#define GRD_HORZ_GUARD_DISCARD_ADJ 4
+#define GRD_STATE_SIZE             5
+
+/* position changes frequently when lighting in modelpos - separate
+ * out to new state item?  
+ */
+#define LIT_CMD_0                  0
+#define LIT_AMBIENT_RED            1
+#define LIT_AMBIENT_GREEN          2
+#define LIT_AMBIENT_BLUE           3
+#define LIT_AMBIENT_ALPHA          4
+#define LIT_DIFFUSE_RED            5
+#define LIT_DIFFUSE_GREEN          6
+#define LIT_DIFFUSE_BLUE           7
+#define LIT_DIFFUSE_ALPHA          8
+#define LIT_SPECULAR_RED           9
+#define LIT_SPECULAR_GREEN         10
+#define LIT_SPECULAR_BLUE          11
+#define LIT_SPECULAR_ALPHA         12
+#define LIT_POSITION_X             13
+#define LIT_POSITION_Y             14
+#define LIT_POSITION_Z             15
+#define LIT_POSITION_W             16
+#define LIT_DIRECTION_X            17
+#define LIT_DIRECTION_Y            18
+#define LIT_DIRECTION_Z            19
+#define LIT_DIRECTION_W            20
+#define LIT_ATTEN_CONST            21
+#define LIT_ATTEN_LINEAR           22
+#define LIT_ATTEN_QUADRATIC        23
+#define LIT_ATTEN_XXX              24
+#define LIT_CMD_1                  25
+#define LIT_SPOT_DCD               26
+#define LIT_SPOT_EXPONENT          27
+#define LIT_SPOT_CUTOFF            28
+#define LIT_SPECULAR_THRESH        29
+#define LIT_RANGE_CUTOFF           30 /* ? */
+#define LIT_RANGE_ATTEN            31 /* ? */
+#define LIT_STATE_SIZE             32
+
+/* Fog
+ */
+#define FOG_CMD_0      0
+#define FOG_R          1
+#define FOG_C          2
+#define FOG_D          3
+#define FOG_PAD        4
+#define FOG_STATE_SIZE 5
+
+/* UCP
+ */
+#define UCP_CMD_0      0
+#define UCP_X          1
+#define UCP_Y          2
+#define UCP_Z          3
+#define UCP_W          4
+#define UCP_STATE_SIZE 5
+
+/* GLT - Global ambient
+ */
+#define GLT_CMD_0      0
+#define GLT_RED        1
+#define GLT_GREEN      2
+#define GLT_BLUE       3
+#define GLT_ALPHA      4
+#define GLT_STATE_SIZE 5
+
+/* EYE
+ */
+#define EYE_CMD_0          0
+#define EYE_X              1
+#define EYE_Y              2
+#define EYE_Z              3
+#define EYE_RESCALE_FACTOR 4
+#define EYE_STATE_SIZE     5
+
+#define SHN_CMD_0          0
+#define SHN_SHININESS      1
+#define SHN_STATE_SIZE     2
+
 
 
+
+
+struct radeon_hw_state {
+   /* All state should be on one of these lists:
+    */
+   struct radeon_state_atom dirty; /* dirty list head placeholder */
+   struct radeon_state_atom clean; /* clean list head placeholder */
+
+   /* Hardware state, stored as cmdbuf commands:  
+    *   -- Need to doublebuffer for
+    *           - reviving state after loss of context
+    *           - eliding noop statechange loops? (except line stipple count)
+    */
+   struct radeon_state_atom ctx;
+   struct radeon_state_atom set;
+   struct radeon_state_atom lin;
+   struct radeon_state_atom msk;
+   struct radeon_state_atom vpt;
+   struct radeon_state_atom tcl;
+   struct radeon_state_atom msc;
+   struct radeon_state_atom tex[2];
+   struct radeon_state_atom zbs;
+   struct radeon_state_atom mtl; 
+   struct radeon_state_atom mat[5]; 
+   struct radeon_state_atom lit[8]; /* includes vec, scl commands */
+   struct radeon_state_atom ucp[6];
+   struct radeon_state_atom eye; /* eye pos */
+   struct radeon_state_atom grd; /* guard band clipping */
+   struct radeon_state_atom fog; 
+   struct radeon_state_atom glt; 
+};
+
+struct radeon_state {
+   /* Derived state for internal purposes:
+    */
    struct radeon_colorbuffer_state color;
    struct radeon_depthbuffer_state depth;
    struct radeon_pixel_state pixel;
@@ -196,13 +437,40 @@ struct radeon_texture {
    GLint numHeaps;
 };
 
+/* Need refcounting on dma buffers:
+ */
+struct radeon_dma_buffer {
+   int refcount;		/* the number of retained regions in buf */
+   drmBufPtr buf;
+};
+
+#define GET_START(rvb) (rmesa->radeonScreen->agp_buffer_offset +			\
+			(rvb)->address - rmesa->dma.buf0_address +	\
+			(rvb)->start)
+
+/* A retained region, eg vertices for indexed vertices.
+ */
+struct radeon_dma_region {
+   struct radeon_dma_buffer *buf;
+   char *address;		/* == buf->address */
+   int start, end, ptr;		/* offsets from start of buf */
+   int aos_start;
+   int aos_stride;
+   int aos_size;
+};
+
 
 struct radeon_dma {
-   drmBufPtr buffer;
-   drmBufPtr retained;
-   GLubyte *address;
-   GLuint low, high, last;
-   GLuint offset;
+   /* Active dma region.  Allocations for vertices and retained
+    * regions come from here.  Also used for emitting random vertices,
+    * these may be flushed by calling flush_current();
+    */
+   struct radeon_dma_region current;
+   
+   void (*flush)( radeonContextPtr );
+
+   char *buf0_address;		/* start of buf[0], for index calcs */
+   GLuint nr_released_bufs;	/* flush after so many buffers released */
 };
 
 struct radeon_dri_mirror {
@@ -215,14 +483,205 @@ struct radeon_dri_mirror {
    drmContext hwContext;
    drmLock *hwLock;
    int fd;
+   int drmMinor;
 };
 
+
+#define RADEON_CMD_BUF_SZ  (8*1024) 
+
 struct radeon_store {
-   radeonTexObjPtr texture[2][RADEON_MAX_STATES];
-   drmRadeonState state[RADEON_MAX_STATES];
-   drmRadeonPrim prim[RADEON_MAX_PRIMS];
    GLuint statenr;
    GLuint primnr;
+   char cmd_buf[RADEON_CMD_BUF_SZ];
+   int cmd_used;   
+   int elts_start;
+};
+
+
+/* radeon_tcl.c
+ */
+struct radeon_tcl_info {
+   GLuint vertex_format;
+   GLint last_offset;
+   GLuint hw_primitive;
+
+   struct radeon_dma_region *aos_components[8];
+   GLuint nr_aos_components;
+
+   GLuint *Elts;
+
+   struct radeon_dma_region indexed_verts;
+   struct radeon_dma_region obj;
+   struct radeon_dma_region rgba;
+   struct radeon_dma_region spec;
+   struct radeon_dma_region fog;
+   struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS];
+   struct radeon_dma_region norm;
+};
+
+
+/* radeon_swtcl.c
+ */
+struct radeon_swtcl_info {
+   GLuint SetupIndex;
+   GLuint SetupNewInputs;
+   GLuint RenderIndex;
+   GLuint vertex_size;
+   GLuint vertex_stride_shift;
+   GLuint vertex_format;
+   char *verts;
+
+   /* Fallback rasterization functions
+    */
+   radeon_point_func draw_point;
+   radeon_line_func draw_line;
+   radeon_tri_func draw_tri;
+
+   GLuint hw_primitive;
+   GLenum render_primitive;
+   GLuint numverts;
+
+   struct radeon_dma_region indexed_verts;
+};
+
+
+struct radeon_ioctl {
+   GLuint vertex_offset;
+   GLuint vertex_size;
+};
+
+
+
+#define RADEON_MAX_PRIMS 64
+
+
+/* Want to keep a cache of these around.  Each is parameterized by
+ * only a single value which has only a small range.  Only expect a
+ * few, so just rescan the list each time?
+ */
+struct dynfn {
+   struct dynfn *next, *prev;
+   int key;
+   char *code;
+};
+
+struct dfn_lists {
+   struct dynfn Vertex2f;
+   struct dynfn Vertex2fv;
+   struct dynfn Vertex3f;
+   struct dynfn Vertex3fv;
+   struct dynfn Color4ub;
+   struct dynfn Color4ubv;
+   struct dynfn Color3ub;
+   struct dynfn Color3ubv;
+   struct dynfn Color4f;
+   struct dynfn Color4fv;
+   struct dynfn Color3f;
+   struct dynfn Color3fv;
+   struct dynfn SecondaryColor3ubEXT;
+   struct dynfn SecondaryColor3ubvEXT;
+   struct dynfn SecondaryColor3fEXT;
+   struct dynfn SecondaryColor3fvEXT;
+   struct dynfn Normal3f;
+   struct dynfn Normal3fv;
+   struct dynfn TexCoord2f;
+   struct dynfn TexCoord2fv;
+   struct dynfn TexCoord1f;
+   struct dynfn TexCoord1fv;
+   struct dynfn MultiTexCoord2fARB;
+   struct dynfn MultiTexCoord2fvARB;
+   struct dynfn MultiTexCoord1fARB;
+   struct dynfn MultiTexCoord1fvARB;
+};
+
+struct _vb;
+
+struct dfn_generators {
+   struct dynfn *(*Vertex2f)( GLcontext *, int );
+   struct dynfn *(*Vertex2fv)( GLcontext *, int );
+   struct dynfn *(*Vertex3f)( GLcontext *, int );
+   struct dynfn *(*Vertex3fv)( GLcontext *, int );
+   struct dynfn *(*Color4ub)( GLcontext *, int );
+   struct dynfn *(*Color4ubv)( GLcontext *, int );
+   struct dynfn *(*Color3ub)( GLcontext *, int );
+   struct dynfn *(*Color3ubv)( GLcontext *, int );
+   struct dynfn *(*Color4f)( GLcontext *, int );
+   struct dynfn *(*Color4fv)( GLcontext *, int );
+   struct dynfn *(*Color3f)( GLcontext *, int );
+   struct dynfn *(*Color3fv)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3ubEXT)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3ubvEXT)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3fEXT)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3fvEXT)( GLcontext *, int );
+   struct dynfn *(*Normal3f)( GLcontext *, int );
+   struct dynfn *(*Normal3fv)( GLcontext *, int );
+   struct dynfn *(*TexCoord2f)( GLcontext *, int );
+   struct dynfn *(*TexCoord2fv)( GLcontext *, int );
+   struct dynfn *(*TexCoord1f)( GLcontext *, int );
+   struct dynfn *(*TexCoord1fv)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord2fARB)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord2fvARB)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord1fARB)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord1fvARB)( GLcontext *, int );
+};
+
+
+struct radeon_vb {
+   /* Keep these first: referenced from codegen templates:
+    */
+   GLint counter, initial_counter;
+   GLint *dmaptr;
+   void (*notify)( void );
+   GLint vertex_size;
+
+   /* A maximum total of 15 elements per vertex:  3 floats for position, 3
+    * floats for normal, 4 floats for color, 4 bytes for secondary color,
+    * 2 floats for each texture unit (4 floats total).
+    * 
+    * As soon as the 3rd TMU is supported or cube maps (or 3D textures) are
+    * supported, this value will grow.
+    * 
+    * The position data is never actually stored here, so 3 elements could be
+    * trimmed out of the buffer.
+    */
+   union { float f; int i; radeon_color_t color; } vertex[15];
+
+   GLfloat *normalptr;
+   GLfloat *floatcolorptr;
+   radeon_color_t *colorptr;
+   GLfloat *floatspecptr;
+   radeon_color_t *specptr;
+   GLfloat *texcoordptr[2];
+
+   GLcontext *context;		/* current context : Single thread only! */
+};
+
+struct radeon_prim {
+   GLuint start;
+   GLuint end;
+   GLuint prim;
+};
+
+struct radeon_vbinfo {
+   GLenum *prim;		/* &ctx->Driver.CurrentExecPrimitive */
+   GLuint primflags;
+   GLboolean enabled;		/* RADEON_NO_VTXFMT//RADEON_NO_TCL env vars */
+   GLboolean installed;
+   GLboolean fell_back;
+   GLboolean recheck;
+   GLint initial_counter;
+   GLint nrverts;
+   GLuint vertex_format;
+
+   GLuint installed_vertex_format;
+   GLuint installed_color_3f_sz;
+
+   struct radeon_prim primlist[RADEON_MAX_PRIMS];
+   int nrprims;
+
+   struct dfn_lists dfn_cache;
+   struct dfn_generators codegen;
+   GLvertexformat vtxfmt;
 };
 
 
@@ -233,31 +692,20 @@ struct radeon_context {
 
    /* Driver and hardware state management
     */
+   struct radeon_hw_state hw;
    struct radeon_state state;
 
    /* Texture object bookkeeping
     */
    struct radeon_texture texture;
 
-   /* Fallback rasterization functions
-    */
-   radeon_point_func draw_point;
-   radeon_line_func draw_line;
-   radeon_tri_func draw_tri;
 
    /* Rasterization and vertex state:
     */
-   GLuint NewGLState;
+   GLuint TclFallback;
    GLuint Fallback;
-   GLuint SetupIndex;
-   GLuint SetupNewInputs;
-   GLuint RenderIndex;
+   GLuint NewGLState;
 
-   GLuint vertex_size;
-   GLuint vertex_stride_shift;
-   GLuint vertex_format;
-   GLuint num_verts;
-   char *verts;
    
    /* Temporaries for translating away float colors:
     */
@@ -266,34 +714,61 @@ struct radeon_context {
 
    /* Vertex buffers
     */
+   struct radeon_ioctl ioctl;
    struct radeon_dma dma;
-
    struct radeon_store store;
-   GLboolean upload_cliprects;
-
-   GLuint hw_primitive;
-   GLenum render_primitive;
 
    /* Page flipping
     */
    GLuint doPageFlip;
-   GLuint currentPage;
+
+   /* Busy waiting
+    */
+   GLuint do_usleeps;
+   GLuint do_irqs;
+   GLuint irqsEmitted;
+   drmRadeonIrqWait iw;
 
    /* Drawable, cliprect and scissor information
     */
    GLuint numClipRects;			/* Cliprects for the draw buffer */
    XF86DRIClipRectPtr pClipRects;
-   GLuint lastStamp;
+   unsigned int lastStamp;
+   GLboolean lost_context;
+   radeonScreenPtr radeonScreen;	/* Screen private DRI data */
+   RADEONSAREAPrivPtr sarea;		/* Private SAREA data */
 
-   /* Mirrors of some DRI state
+   /* TCL stuff
     */
-   struct radeon_dri_mirror dri;
+   GLmatrix TexGenMatrix[RADEON_MAX_TEXTURE_UNITS];
+   GLboolean recheck_texgen[RADEON_MAX_TEXTURE_UNITS];
+   GLboolean TexGenNeedNormals[RADEON_MAX_TEXTURE_UNITS];
+   GLuint TexMatEnabled;
+   GLuint TexGenEnabled;
+   GLmatrix tmpmat;
+   GLuint last_ReallyEnabled;
+
+   /* VBI
+    */
+   GLuint vbl_seq;
 
-   radeonScreenPtr radeonScreen;	/* Screen private DRI data */
-   RADEONSAREAPrivPtr sarea;		/* Private SAREA data */
+   /* radeon_tcl.c
+    */
+   struct radeon_tcl_info tcl;
+
+   /* radeon_swtcl.c
+    */
+   struct radeon_swtcl_info swtcl;
+
+   /* radeon_vtxfmt.c
+    */
+   struct radeon_vbinfo vb;
 
-   GLboolean debugFallbacks;
+   /* Mirrors of some DRI state
+    */
+   struct radeon_dri_mirror dri;
 
+ 
    /* Performance counters
     */
    GLuint boxes;			/* Draw performance boxes */
@@ -322,11 +797,12 @@ static __inline GLuint radeonPackColor( GLuint cpp,
    }
 }
 
+#define RADEON_OLD_PACKETS 1
 
 /* ================================================================
  * Debugging:
  */
-#define DO_DEBUG		0
+#define DO_DEBUG		1
 
 #if DO_DEBUG
 extern int RADEON_DEBUG;
@@ -334,14 +810,18 @@ extern int RADEON_DEBUG;
 #define RADEON_DEBUG		0
 #endif
 
-#define DEBUG_ALWAYS_SYNC	0x01
-#define DEBUG_VERBOSE_API	0x02
-#define DEBUG_VERBOSE_MSG	0x04
-#define DEBUG_VERBOSE_LRU	0x08
-#define DEBUG_VERBOSE_DRI	0x10
-#define DEBUG_VERBOSE_IOCTL	0x20
-#define DEBUG_VERBOSE_2D	0x40
-#define DEBUG_VERBOSE_TEXTURE	0x80
+#define DEBUG_TEXTURE	0x001
+#define DEBUG_STATE	0x002
+#define DEBUG_IOCTL	0x004
+#define DEBUG_PRIMS	0x008
+#define DEBUG_VERTS	0x010
+#define DEBUG_FALLBACKS	0x020
+#define DEBUG_VFMT	0x040
+#define DEBUG_CODEGEN	0x080
+#define DEBUG_VERBOSE	0x100
+#define DEBUG_DRI       0x200
+#define DEBUG_DMA       0x400
+#define DEBUG_SANITY    0x800
 
 #endif
 #endif /* __RADEON_CONTEXT_H__ */
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c
index 223301ccb..9f0c5065b 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c,v 1.7 2002/09/16 18:05:19 eich Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c,v 1.11 2003/01/29 22:04:59 dawes Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -31,480 +31,704 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  * Authors:
  *   Kevin E. Martin <martin@valinux.com>
  *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
  *
  */
 
 #include "radeon_context.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
+#include "radeon_tcl.h"
+#include "radeon_sanity.h"
+
+#include "radeon_macros.h"  /* for INREG() */
 
 #include "mem.h"
 #include "macros.h"
 #include "swrast/swrast.h"
+#include "simple_list.h"
 
 #define RADEON_TIMEOUT             512
+#define RADEON_IDLE_RETRY           16
+
+#include <unistd.h>  /* for usleep() */
+
+static void do_usleep( int nr, const char *caller )
+{
+   if (0) fprintf(stderr, "usleep %d in %s\n", nr, caller );
+   if (1) usleep( nr );
+}
 
+static void radeonWaitForIdle( radeonContextPtr rmesa );
 
 /* =============================================================
- * Hardware vertex buffer handling
+ * Kernel command buffer handling
  */
 
-/* Get a new VB from the pool of vertex buffers in AGP space.
- */
-drmBufPtr radeonGetBufferLocked( radeonContextPtr rmesa )
+static void print_state_atom( struct radeon_state_atom *state )
 {
-   int fd = rmesa->dri.fd;
-   int index = 0;
-   int size = 0;
-   drmDMAReq dma;
-   drmBufPtr buf = NULL;
-   int to = 0;
-   int ret;
+   int i;
 
-   dma.context = rmesa->dri.hwContext;
-   dma.send_count = 0;
-   dma.send_list = NULL;
-   dma.send_sizes = NULL;
-   dma.flags = 0;
-   dma.request_count = 1;
-   dma.request_size = RADEON_BUFFER_SIZE;
-   dma.request_list = &index;
-   dma.request_sizes = &size;
-   dma.granted_count = 0;
+   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
 
-   while ( !buf && ( to++ < RADEON_TIMEOUT ) ) {
-      ret = drmDMA( fd, &dma );
+   if (RADEON_DEBUG & DEBUG_VERBOSE) 
+      for (i = 0 ; i < state->cmd_size ; i++) 
+	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
+
+}
+
+static void radeon_emit_state_list( radeonContextPtr rmesa, 
+				    struct radeon_state_atom *list )
+{
+   struct radeon_state_atom *state, *tmp;
+   char *dest;
+
+   /* From Felix Kuhling: similar to some other lockups, glaxium will
+    * lock with what we believe to be a normal command stream, but
+    * sprinkling some magic waits arounds allows it to run
+    * uninterrupted.  This has a slight effect on q3 framerates, but
+    * it might now be possible to remove the zbs hack, below.
+    *
+    * Felix reports that this can be narrowed down to just
+    * tcl,tex0,tex1 state, but that's pretty much every statechange,
+    * so let's just put the wait in always (unless Felix wants to
+    * narrow it down further...)
+    */
+   if (1) {
+      drmRadeonCmdHeader *cmd;
+      cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), 
+						     __FUNCTION__ );
+      cmd->wait.cmd_type = RADEON_CMD_WAIT;
+      cmd->wait.flags = RADEON_WAIT_3D;
+   }
 
-      if ( ret == 0 ) {
-	 buf = &rmesa->radeonScreen->buffers->list[index];
-	 buf->used = 0;
-	 /* Bump the performance counter */
-	 rmesa->c_vertexBuffers++;
-	 return buf;
+   foreach_s( state, tmp, list ) {
+      if (state->check( rmesa->glCtx )) {
+	 dest = radeonAllocCmdBuf( rmesa, state->cmd_size * 4, __FUNCTION__);
+	 memcpy( dest, state->cmd, state->cmd_size * 4);
+	 move_to_head( &(rmesa->hw.clean), state );
+	 if (RADEON_DEBUG & DEBUG_STATE) 
+	    print_state_atom( state );
       }
+      else if (RADEON_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "skip state %s\n", state->name);
    }
+}
 
-   if ( !buf ) {
-      UNLOCK_HARDWARE( rmesa );
-      fprintf( stderr, "Error: Could not get new VB... exiting\n" );
-      exit( -1 );
+
+void radeonEmitState( radeonContextPtr rmesa )
+{
+   struct radeon_state_atom *state, *tmp;
+
+   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* Somewhat overkill:
+    */
+   if (rmesa->lost_context) {
+      if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
+	 fprintf(stderr, "%s - lost context\n", __FUNCTION__); 
+
+      foreach_s( state, tmp, &(rmesa->hw.clean) ) 
+	 move_to_tail(&(rmesa->hw.dirty), state );
+
+      rmesa->lost_context = 0;
+   }
+   else if (1) {
+      /* This is a darstardly kludge to work around a lockup that I
+       * haven't otherwise figured out.
+       */
+      move_to_tail(&(rmesa->hw.dirty), &(rmesa->hw.zbs) );
    }
 
-   return buf;
+   if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
+     foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
+       if (state->is_tcl) {
+	 move_to_head( &(rmesa->hw.clean), state );
+       }
+     }
+   }
+
+   radeon_emit_state_list( rmesa, &rmesa->hw.dirty );
 }
 
 
-static GLboolean intersect_rect( XF86DRIClipRectPtr out,
-				 XF86DRIClipRectPtr a,
-				 XF86DRIClipRectPtr b )
+
+/* Fire a section of the retained (indexed_verts) buffer as a regular
+ * primtive.  
+ */
+extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
+				GLuint vertex_format,
+				GLuint primitive,
+				GLuint vertex_nr )
 {
-   *out = *a;
-   if ( b->x1 > out->x1 ) out->x1 = b->x1;
-   if ( b->y1 > out->y1 ) out->y1 = b->y1;
-   if ( b->x2 < out->x2 ) out->x2 = b->x2;
-   if ( b->y2 < out->y2 ) out->y2 = b->y2;
-   if ( out->x1 >= out->x2 ) return GL_FALSE;
-   if ( out->y1 >= out->y2 ) return GL_FALSE;
-   return GL_TRUE;
+   drmRadeonCmdHeader *cmd;
+
+
+   assert(rmesa->dri.drmMinor >= 3); 
+   assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   
+   radeonEmitState( rmesa );
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
+	      rmesa->store.cmd_used/4);
+   
+#if RADEON_OLD_PACKETS
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 6 * sizeof(*cmd),
+						  __FUNCTION__ );
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
+   cmd[2].i = rmesa->ioctl.vertex_offset;
+   cmd[3].i = vertex_nr;
+   cmd[4].i = vertex_format;
+   cmd[5].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+
+   if (RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
+	      __FUNCTION__,
+	      cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
+#else
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 4 * sizeof(*cmd),
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
+   cmd[2].i = vertex_format;
+   cmd[3].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+
+
+   if (RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
+	      __FUNCTION__,
+	      cmd[1].i, cmd[2].i, cmd[3].i);
+#endif
 }
 
-static void emit_state( radeonContextPtr rmesa,
-			drmRadeonState *dest,
-			int dirty )
-{
-   struct radeon_state *state = &rmesa->state;
 
-   if ( dirty & RADEON_UPLOAD_CONTEXT )
-      memcpy( &dest->context, &state->hw.context, sizeof(dest->context) );
+void radeonFlushElts( radeonContextPtr rmesa )
+{
+   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
+   int dwords;
+#if RADEON_OLD_PACKETS
+   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
+#else
+   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
+#endif
 
-   if ( dirty & RADEON_UPLOAD_VERTFMT )
-      memcpy( &dest->vertex, &state->hw.vertex, sizeof(dest->vertex) );
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
-   if ( dirty & RADEON_UPLOAD_LINE )
-      memcpy( &dest->line, &state->hw.line, sizeof(dest->line) );
+   assert( rmesa->dma.flush == radeonFlushElts );
+   rmesa->dma.flush = 0;
 
-   if ( dirty & RADEON_UPLOAD_BUMPMAP )
-      memcpy( &dest->bumpmap, &state->hw.bumpmap, sizeof(dest->bumpmap) );
+   /* Cope with odd number of elts:
+    */
+   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
+   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
+
+#if RADEON_OLD_PACKETS
+   cmd[1] |= (dwords - 3) << 16;
+   cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+#else
+   cmd[1] |= (dwords - 3) << 16;
+   cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+#endif
+}
 
-   if ( dirty & RADEON_UPLOAD_MASKS )
-      memcpy( &dest->mask, &state->hw.mask, sizeof(dest->mask) );
 
-   if ( dirty & RADEON_UPLOAD_VIEWPORT )
-      memcpy( &dest->viewport, &state->hw.viewport, sizeof(dest->viewport) );
+GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
+				    GLuint vertex_format,
+				    GLuint primitive,
+				    GLuint min_nr )
+{
+   drmRadeonCmdHeader *cmd;
+   GLushort *retval;
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
+
+   assert(rmesa->dri.drmMinor >= 3); 
+   assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   
+   radeonEmitState( rmesa );
+   
+#if RADEON_OLD_PACKETS
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 
+						  24 + min_nr*2,
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
+   cmd[2].i = rmesa->ioctl.vertex_offset;
+   cmd[3].i = 0xffff;
+   cmd[4].i = vertex_format;
+   cmd[5].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+
+   retval = (GLushort *)(cmd+6);
+#else   
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 
+						  16 + min_nr*2,
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
+   cmd[2].i = vertex_format;
+   cmd[3].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+
+   retval = (GLushort *)(cmd+4);
+#endif
 
-   if ( dirty & RADEON_UPLOAD_SETUP ) {
-      memcpy( &dest->setup1, &state->hw.setup1, sizeof(dest->setup1) );
-      memcpy( &dest->setup2, &state->hw.setup2, sizeof(dest->setup2) );
-   }
+   if (RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
+	      __FUNCTION__,
+	      cmd[1].i, vertex_format, primitive);
 
-   if ( dirty & RADEON_UPLOAD_MISC )
-      memcpy( &dest->misc, &state->hw.misc, sizeof(dest->misc) );
+   assert(!rmesa->dma.flush);
+   rmesa->dma.flush = radeonFlushElts;
 
-   if ( dirty & RADEON_UPLOAD_ZBIAS )
-      memcpy( &dest->zbias, &state->hw.zbias, sizeof(dest->zbias) );
+   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
 
-   /* Assemble the texture state, combining the texture object and
-    * texture environment state into the hardware texture unit state.
-    */
-   if ( dirty & RADEON_UPLOAD_TEX0 ) {
-      radeonTexObjPtr t0 = state->texture.unit[0].texobj;
+   return retval;
+}
 
-      dest->texture[0].pp_txfilter = t0->pp_txfilter | state->hw.texture[0].pp_txfilter;
-      dest->texture[0].pp_txformat = t0->pp_txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0;
-      dest->texture[0].pp_txoffset = t0->pp_txoffset;
-      dest->texture[0].pp_border_color = t0->pp_border_color;
-      dest->texture[0].pp_txcblend = state->hw.texture[0].pp_txcblend;
-      dest->texture[0].pp_txablend = state->hw.texture[0].pp_txablend;
-      dest->texture[0].pp_tfactor = state->hw.texture[0].pp_tfactor;
-   }
 
-   if ( dirty & RADEON_UPLOAD_TEX1 ) {
-      radeonTexObjPtr t1 = state->texture.unit[1].texobj;
 
-      dest->texture[1].pp_txfilter = t1->pp_txfilter | state->hw.texture[1].pp_txfilter;
-      dest->texture[1].pp_txformat = t1->pp_txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1;
-      dest->texture[1].pp_txoffset = t1->pp_txoffset;
-      dest->texture[1].pp_border_color = t1->pp_border_color;
-      dest->texture[1].pp_txcblend = state->hw.texture[1].pp_txcblend;
-      dest->texture[1].pp_txablend = state->hw.texture[1].pp_txablend;
-      dest->texture[1].pp_tfactor = state->hw.texture[1].pp_tfactor;
-   }
+void radeonEmitVertexAOS( radeonContextPtr rmesa,
+			  GLuint vertex_size,
+			  GLuint offset )
+{
+#if RADEON_OLD_PACKETS
+   rmesa->ioctl.vertex_size = vertex_size;
+   rmesa->ioctl.vertex_offset = offset;
+#else
+   drmRadeonCmdHeader *cmd;
+   assert(rmesa->dri.drmMinor >= 3); 
+
+   if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
+      fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
+	      __FUNCTION__, vertex_size, offset);
+
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 5 * sizeof(int),
+						  __FUNCTION__ );
+
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
+   cmd[2].i = 1;
+   cmd[3].i = vertex_size | (vertex_size << 8);
+   cmd[4].i = offset;
+#endif
 }
+		       
 
-#if 0
-static void print_values( const char *name, const void *vals, int sz )
+void radeonEmitAOS( radeonContextPtr rmesa,
+		    struct radeon_dma_region **component,
+		    GLuint nr,
+		    GLuint offset )
 {
-   const int *ivals = (const int *)vals;
+#if RADEON_OLD_PACKETS
+   assert( nr == 1 );
+   assert( component[0]->aos_size == component[0]->aos_stride );
+   rmesa->ioctl.vertex_size = component[0]->aos_size;
+   rmesa->ioctl.vertex_offset = 
+      (component[0]->aos_start + offset * component[0]->aos_stride * 4);
+#else
+   drmRadeonCmdHeader *cmd;
+   int sz = 3 + (nr/2 * 3) + (nr & 1) * 2;
    int i;
+   int *tmp;
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(rmesa->dri.drmMinor >= 3); 
+
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sz * sizeof(int),
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | ((sz-3) << 16);
+   cmd[2].i = nr;
+   tmp = &cmd[0].i;
+   cmd += 3;
+
+   for (i = 0 ; i < nr ; i++) {
+      if (i & 1) {
+	 cmd[0].i |= ((component[i]->aos_stride << 24) | 
+		      (component[i]->aos_size << 16));
+	 cmd[2].i = (component[i]->aos_start + 
+		     offset * component[i]->aos_stride * 4);
+	 cmd += 3;
+      }
+      else {
+	 cmd[0].i = ((component[i]->aos_stride << 8) | 
+		     (component[i]->aos_size << 0));
+	 cmd[1].i = (component[i]->aos_start + 
+		     offset * component[i]->aos_stride * 4);
+      }
+   }
 
-   for (i = 0; i < sz/4 ; i++)
-      fprintf(stderr, "%s %d: 0x%x\n", name, i, ivals[i]);
-}
+   if (RADEON_DEBUG & DEBUG_VERTS) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      for (i = 0 ; i < sz ; i++)
+	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
+   }
 #endif
-/*
-static void print_state( drmRadeonState *state )
-{
-   int dirty = state->dirty;
+}
 
-   if ( dirty & RADEON_UPLOAD_CONTEXT ) 
-      print_values( "CONTEXT", &state->context, sizeof(state->context) );
 
-   if ( dirty & RADEON_UPLOAD_VERTFMT )
-      print_values( "VERTFMT", &state->vertex, sizeof(state->vertex) );
+static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
+				    const char * caller )
+{
+   int ret, i;
+   drmRadeonCmdBuffer cmd;
 
-   if ( dirty & RADEON_UPLOAD_LINE )
-      print_values( "LINE", &state->line, sizeof(state->line) );
+   if (RADEON_DEBUG & DEBUG_IOCTL) {
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
 
-   if ( dirty & RADEON_UPLOAD_BUMPMAP )
-      print_values( "BUMPMAP", &state->bumpmap, sizeof(state->bumpmap) );
+      if (RADEON_DEBUG & DEBUG_VERBOSE) 
+	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
+	    fprintf(stderr, "%d: %x\n", i/4, 
+		    *(int *)(&rmesa->store.cmd_buf[i]));
+   }
 
-   if ( dirty & RADEON_UPLOAD_MASKS )
-      print_values( "MASKS", &state->mask, sizeof(state->mask) );
+   if (RADEON_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
+	      rmesa->dma.nr_released_bufs);
 
-   if ( dirty & RADEON_UPLOAD_VIEWPORT )
-      print_values( "VIEWPORT", &state->viewport, sizeof(state->viewport) );
 
-   if ( dirty & RADEON_UPLOAD_SETUP ) {
-      print_values( "SETUP", &state->setup1, sizeof(state->setup1) );
-      print_values( "SETUP2", &state->setup2, sizeof(state->setup2) );
+   if (RADEON_DEBUG & DEBUG_SANITY) {
+      if (rmesa->state.scissor.enabled) 
+	 ret = radeonSanityCmdBuffer( rmesa, 
+				      rmesa->state.scissor.numClipRects,
+				      rmesa->state.scissor.pClipRects);
+      else
+	 ret = radeonSanityCmdBuffer( rmesa, 
+				      rmesa->numClipRects,
+				      rmesa->pClipRects);
    }
 
-   if ( dirty & RADEON_UPLOAD_MISC )
-      print_values( "MISC", &state->misc, sizeof(state->misc) );
+   cmd.bufsz = rmesa->store.cmd_used;
+   cmd.buf = rmesa->store.cmd_buf;
 
-   if ( dirty & RADEON_UPLOAD_ZBIAS )
-      print_values( "ZBIAS", &state->zbias, sizeof(state->zbias) );
+   if (rmesa->state.scissor.enabled) {
+      cmd.nbox = rmesa->state.scissor.numClipRects;
+      cmd.boxes = (drmClipRect *)rmesa->state.scissor.pClipRects;
+   } else {
+      cmd.nbox = rmesa->numClipRects;
+      cmd.boxes = (drmClipRect *)rmesa->pClipRects;
+   }
 
-   if ( dirty & RADEON_UPLOAD_TEX0 ) 
-      print_values( "TEX0", &state->texture[0], sizeof(state->texture[0]) );
+   ret = drmCommandWrite( rmesa->dri.fd,
+			  DRM_RADEON_CMDBUF,
+			  &cmd, sizeof(cmd) );
 
-   if ( dirty & RADEON_UPLOAD_TEX1 ) 
-      print_values( "TEX1", &state->texture[1], sizeof(state->texture[1]) );
+   rmesa->store.primnr = 0;
+   rmesa->store.statenr = 0;
+   rmesa->store.cmd_used = 0;
+   rmesa->dma.nr_released_bufs = 0;
+   rmesa->lost_context = 1;	
+   return ret;
 }
-*/
 
-static void emit_prim( radeonContextPtr rmesa )
-{
-   GLuint prim = rmesa->store.primnr++;
-   GLuint dirty = rmesa->state.hw.dirty;
 
-   rmesa->store.prim[prim].prim = rmesa->hw_primitive;
-   rmesa->store.prim[prim].start = rmesa->dma.last;
-   rmesa->store.prim[prim].finish = rmesa->dma.low;
-   rmesa->store.prim[prim].vc_format = rmesa->vertex_format;
-
-   if (rmesa->hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)
-      rmesa->store.prim[prim].numverts = rmesa->dma.offset / 64;
-   else
-      rmesa->store.prim[prim].numverts = rmesa->num_verts;
+/* Note: does not emit any commands to avoid recursion on
+ * radeonAllocCmdBuf.
+ */
+void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
+{
+   int ret;
 
-   rmesa->num_verts = 0;
-   rmesa->dma.last = rmesa->dma.low;
+	      
+   assert (rmesa->dri.drmMinor >= 3);
 
+   LOCK_HARDWARE( rmesa );
 
+   ret = radeonFlushCmdBufLocked( rmesa, caller );
 
+   UNLOCK_HARDWARE( rmesa );
 
-   /* Make sure we keep a copy of the initial state.
-    */
-   if (prim == 0) {
-      dirty = RADEON_UPLOAD_CONTEXT_ALL;
-      if (rmesa->state.texture.unit[0].texobj) dirty |= RADEON_UPLOAD_TEX0;
-      if (rmesa->state.texture.unit[1].texobj) dirty |= RADEON_UPLOAD_TEX1;
+   if (ret) {
+      fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret);
+      exit(ret);
    }
+}
 
 
-   if (dirty)
-   {
-      GLuint state = rmesa->store.statenr++;
-
-      emit_state( rmesa, &rmesa->store.state[state], dirty );
-/*        fprintf(stderr, "emit state %d, dirty %x rmesa->dirty %x\n", */
-/*  	      state, dirty, rmesa->state.hw.dirty ); */
-      rmesa->store.state[state].dirty = rmesa->state.hw.dirty;	/* override */
-      rmesa->store.texture[0][state] = rmesa->state.texture.unit[0].texobj;
-      rmesa->store.texture[1][state] = rmesa->state.texture.unit[1].texobj;
-      rmesa->state.hw.dirty = 0;
-/*        print_state( &rmesa->store.state[state] ); */
-   }
-
-   rmesa->store.prim[prim].stateidx = rmesa->store.statenr - 1;
-
-/*     fprintf(stderr, "emit_prim %d hwprim 0x%x vfmt 0x%x %d..%d %d verts stateidx %x\n", */
-/*  	   prim, */
-/*  	   rmesa->store.prim[prim].prim, */
-/*  	   rmesa->store.prim[prim].vc_format, */
-/*  	   rmesa->store.prim[prim].start, */
-/*  	   rmesa->store.prim[prim].finish, */
-/*  	   rmesa->store.prim[prim].numverts, */
-/*  	   rmesa->store.prim[prim].stateidx); */
-}
+/* =============================================================
+ * Hardware vertex buffer handling
+ */
 
 
-void radeonFlushPrimsLocked( radeonContextPtr rmesa )
+void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
 {
-   XF86DRIClipRectPtr pbox = (XF86DRIClipRectPtr)rmesa->pClipRects;
-   int nbox = rmesa->numClipRects;
-   drmBufPtr buffer = rmesa->dma.buffer;
-   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+   struct radeon_dma_buffer *dmabuf;
    int fd = rmesa->dri.fd;
-   int discard_sz = rmesa->dma.high - rmesa->dma.low < 4096;
-   int discard = (rmesa->dma.retained != rmesa->dma.buffer &&
-		  discard_sz);
-   int i;
+   int index = 0;
+   int size = 0;
+   drmDMAReq dma;
+   int ret;
 
-   if ( !nbox )
-      rmesa->store.primnr = 0;
-   else if ( nbox >= RADEON_NR_SAREA_CLIPRECTS ) {
-      rmesa->upload_cliprects = 1;
-      for ( i = 0 ; i < rmesa->store.statenr ; i++ )
-	 rmesa->store.state[0].dirty |= rmesa->store.state[i].dirty;
-      if ( !rmesa->store.texture[0][0] )
-	 rmesa->store.state[0].dirty &= ~RADEON_UPLOAD_TEX0;
-      if ( !rmesa->store.texture[1][0] )
-	 rmesa->store.state[0].dirty &= ~RADEON_UPLOAD_TEX1;
+   if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+      fprintf(stderr, "%s\n", __FUNCTION__);  
+
+   if (rmesa->dma.flush) {
+      rmesa->dma.flush( rmesa );
    }
 
+   if (rmesa->dma.current.buf)
+      radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
 
-/*     fprintf(stderr, "%s: boxes: %d prims: %d states: %d vertexstore: 0x%x\n", */
-/*  	   __FUNCTION__, */
-/*  	   sarea->nbox, rmesa->store.primnr, rmesa->store.statenr, */
-/*  	   rmesa->dma.low - rmesa->store.prim[0].start); */
+   if (rmesa->dma.nr_released_bufs > 4)
+      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
 
-   if ( !rmesa->upload_cliprects || !rmesa->store.primnr )
-   {
-      if ( nbox == 1 ) {
-	 sarea->nbox = 0;
-      } else {
-	 sarea->nbox = nbox;
-      }
+   dma.context = rmesa->dri.hwContext;
+   dma.send_count = 0;
+   dma.send_list = NULL;
+   dma.send_sizes = NULL;
+   dma.flags = 0;
+   dma.request_count = 1;
+   dma.request_size = RADEON_BUFFER_SIZE;
+   dma.request_list = &index;
+   dma.request_sizes = &size;
+   dma.granted_count = 0;
 
-/*        fprintf(stderr, "case a %d boxes %d prims %d states\n", */
-/*  	      sarea->nbox, rmesa->store.primnr, rmesa->store.statenr); */
-      if (discard || rmesa->store.primnr)
-	 drmRadeonFlushPrims( fd, 
-			       buffer->idx,
-			       discard,
-			       rmesa->store.statenr,
-			       rmesa->store.state,
-			       rmesa->store.primnr,
-			       rmesa->store.prim);
-   }
-   else
-   {
-      for ( i = 0 ; i < nbox ; ) {
-	 int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox );
-	 XF86DRIClipRectPtr b = sarea->boxes;
-	 int discard_now = 0;
-
-	 /* TODO: Precalculate this intersection:
-	  */
-	 if ( rmesa->state.scissor.enabled ) {
-	    sarea->nbox = 0;
-
-	    for ( ; i < nr ; i++ ) {
-	       *b = pbox[i];
-	       if ( intersect_rect( b, b, &rmesa->state.scissor.rect ) ) {
-		  sarea->nbox++;
-		  b++;
-	       }
-	    }
-
-	    /* Culled?
-	     */
-	    if ( !sarea->nbox ) {
-	       if ( nr < nbox ) continue;
-	       rmesa->store.primnr = 0;
-	    }
-	 } else {
-	    sarea->nbox = nr - i;
-	    for ( ; i < nr ; i++) {
-	       *b++ = pbox[i];
-	    }
-	 }
+   LOCK_HARDWARE(rmesa);	/* no need to validate */
 
-	 /* Finished with the buffer?
-	  */
-	 if ( nr == nbox ) {
-	    discard_now = discard;
-	 }
+   ret = drmDMA( fd, &dma );
+      
+   if (ret != 0) {
+      /* Free some up this way?
+       */
+      if (rmesa->dma.nr_released_bufs) {
+	 radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+      }
+      
+      if (RADEON_DEBUG & DEBUG_DMA)
+	 fprintf(stderr, "Waiting for buffers\n");
+
+      radeonWaitForIdleLocked( rmesa );
+      ret = drmDMA( fd, &dma );
 
-/*  	 fprintf(stderr, "case a %d boxes %d prims %d states, discard: %d\n", */
-/*  		 sarea->nbox, rmesa->store.primnr, rmesa->store.statenr, discard); */
-	 drmRadeonFlushPrims( fd, 
-			       buffer->idx,
-			       discard_now,
-			       rmesa->store.statenr,
-			       rmesa->store.state,
-			       rmesa->store.primnr,
-			       rmesa->store.prim);
+      if ( ret != 0 ) {
+	 UNLOCK_HARDWARE( rmesa );
+	 fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
+	 exit( -1 );
       }
    }
 
-   if (discard_sz) {
-      rmesa->dma.buffer = 0;
-      rmesa->dma.address = 0;
-      rmesa->dma.low = 0;
-      rmesa->dma.high = 0;
-   }
-   else {
-      rmesa->dma.low = (rmesa->dma.low + 0x7) & ~0x7;  /* alignment */
-   }
-   rmesa->dma.last = rmesa->dma.low;
-   rmesa->store.primnr = 0;
-   rmesa->store.statenr = 0;
-   rmesa->upload_cliprects = 0;
-   rmesa->num_verts = 0;
-}
+   UNLOCK_HARDWARE(rmesa);
 
-void radeonFlushPrimsGetBuffer( radeonContextPtr rmesa )
-{
-   if (rmesa->dma.low != rmesa->dma.last)
-      emit_prim( rmesa );
+   if (RADEON_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "Allocated buffer %d\n", index);
 
-   LOCK_HARDWARE(rmesa);
+   dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
+   dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
+   dmabuf->refcount = 1;
 
-   if (rmesa->dma.buffer) {
-      rmesa->dma.low = rmesa->dma.high; /* force discard */
-      rmesa->dma.last = rmesa->dma.low;
-      radeonFlushPrimsLocked( rmesa );
-   }
+   rmesa->dma.current.buf = dmabuf;
+   rmesa->dma.current.address = dmabuf->buf->address;
+   rmesa->dma.current.end = dmabuf->buf->total;
+   rmesa->dma.current.start = 0;
+   rmesa->dma.current.ptr = 0;
 
-   rmesa->dma.buffer = radeonGetBufferLocked( rmesa );
-   rmesa->dma.high = rmesa->dma.buffer->total;
-   rmesa->dma.address = (GLubyte *)rmesa->dma.buffer->address;
-   rmesa->dma.low = 0;
-   rmesa->num_verts = 0;
-   rmesa->dma.last = rmesa->dma.low;
-   UNLOCK_HARDWARE(rmesa);
+   rmesa->c_vertexBuffers++;
 }
 
-
-void radeonFlushPrims( radeonContextPtr rmesa )
+void radeonReleaseDmaRegion( radeonContextPtr rmesa,
+			     struct radeon_dma_region *region,
+			     const char *caller )
 {
-   if (rmesa->dma.buffer) {
-      if (rmesa->dma.low != rmesa->dma.last)
-	 emit_prim( rmesa );
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
+   
+   if (!region->buf)
+      return;
 
-      LOCK_HARDWARE( rmesa );
-      radeonFlushPrimsLocked( rmesa );
-      UNLOCK_HARDWARE( rmesa );
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
+
+   if (--region->buf->refcount == 0) {
+      drmRadeonCmdHeader *cmd;
+
+      if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
+		 region->buf->buf->idx);  
+      
+      cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), 
+						     __FUNCTION__ );
+      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
+      cmd->dma.buf_idx = region->buf->buf->idx;
+      FREE(region->buf);
+      rmesa->dma.nr_released_bufs++;
    }
+
+   region->buf = 0;
+   region->start = 0;
 }
 
-void radeonEmitPrim( radeonContextPtr rmesa )
+/* Allocates a region from rmesa->dma.current.  If there isn't enough
+ * space in current, grab a new buffer (and discard what was left of current)
+ */
+void radeonAllocDmaRegion( radeonContextPtr rmesa, 
+			   struct radeon_dma_region *region,
+			   int bytes,
+			   int alignment )
 {
-   ASSERT(rmesa->dma.buffer);
-   emit_prim( rmesa );
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
 
-   if (rmesa->store.primnr == RADEON_MAX_PRIMS ||
-       rmesa->store.statenr == RADEON_MAX_STATES) {
-      LOCK_HARDWARE(rmesa);
-      radeonFlushPrimsLocked(rmesa);
-      UNLOCK_HARDWARE(rmesa);
-   }
-   else {
-      rmesa->dma.low = (rmesa->dma.low + 0x7) & ~0x7;  /* alignment */
-      rmesa->dma.last = rmesa->dma.low;
-      rmesa->num_verts = 0;
-   }
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
+
+   if (region->buf)
+      radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
+
+   alignment--;
+   rmesa->dma.current.start = rmesa->dma.current.ptr = 
+      (rmesa->dma.current.ptr + alignment) & ~alignment;
+
+   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+      radeonRefillCurrentDmaRegion( rmesa );
+
+   region->start = rmesa->dma.current.start;
+   region->ptr = rmesa->dma.current.start;
+   region->end = rmesa->dma.current.start + bytes;
+   region->address = rmesa->dma.current.address;
+   region->buf = rmesa->dma.current.buf;
+   region->buf->refcount++;
+
+   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+   rmesa->dma.current.start = 
+      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
+
+   if ( rmesa->dri.drmMinor < 3 ) 
+      radeonRefillCurrentDmaRegion( rmesa );
 }
 
+void radeonAllocDmaRegionVerts( radeonContextPtr rmesa, 
+				struct radeon_dma_region *region,
+				int numverts,
+				int vertsize,
+				int alignment )
+{
+   radeonAllocDmaRegion( rmesa, region, vertsize * numverts, alignment );
+}
 
 /* ================================================================
- * Texture uploads
+ * SwapBuffers with client-side throttling
  */
 
-void radeonFireBlitLocked( radeonContextPtr rmesa, drmBufPtr buffer,
-			   GLint offset, GLint pitch, GLint format,
-			   GLint x, GLint y, GLint width, GLint height )
+static CARD32 radeonGetLastFrame (radeonContextPtr rmesa) 
 {
-#if 0
-   GLint ret;
+   unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
+   int ret;
+   CARD32 frame;
+
+   if (rmesa->dri.screen->drmMinor >= 4) {
+      drmRadeonGetParam gp;
 
-   ret = drmRadeonTextureBlit( rmesa->dri.fd, buffer->idx,
-			       offset, pitch, format,
-			       x, y, width, height );
+      gp.param = RADEON_PARAM_LAST_FRAME;
+      gp.value = (int *)&frame;
+      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
+				 &gp, sizeof(gp) );
+   } 
+   else
+      ret = -EINVAL;
 
+#ifndef __alpha__
+   if ( ret == -EINVAL ) {
+      frame = INREG( RADEON_LAST_FRAME_REG );
+      ret = 0;
+   } 
+#endif
    if ( ret ) {
-      UNLOCK_HARDWARE( rmesa );
-      fprintf( stderr, "drmRadeonTextureBlit: return = %d\n", ret );
-      exit( 1 );
+      fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
+      exit(1);
    }
-#endif
+
+   return frame;
 }
 
+static void radeonEmitIrqLocked( radeonContextPtr rmesa )
+{
+   drmRadeonIrqEmit ie;
+   int ret;
+
+   ie.irq_seq = &rmesa->iw.irq_seq;
+   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, 
+			      &ie, sizeof(ie) );
+   if ( ret ) {
+      fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret );
+      exit(1);
+   }
+}
 
-/* ================================================================
- * SwapBuffers with client-side throttling
- */
 
-#define RADEON_MAX_OUTSTANDING	2
+static void radeonWaitIrq( radeonContextPtr rmesa )
+{
+   int ret;
+
+   do {
+      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
+			     &rmesa->iw, sizeof(rmesa->iw) );
+   } while (ret && (errno == EINTR || errno == EAGAIN));
 
-static void delay( void ) {
-/* Prevent an optimizing compiler from removing a spin loop */
+   if ( ret ) {
+      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
+      exit(1);
+   }
 }
 
-static int radeonWaitForFrameCompletion( radeonContextPtr rmesa )
+
+static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
 {
-   unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
    RADEONSAREAPrivPtr sarea = rmesa->sarea;
-   CARD32 frame;
-   int wait = 0;
-   int i;
 
-   while ( 1 ) {
-      frame = INREG( RADEON_LAST_FRAME_REG );
-      if ( sarea->last_frame - frame <= RADEON_MAX_OUTSTANDING ) {
-	 break;
+   if (rmesa->do_irqs) {
+      if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
+	 if (!rmesa->irqsEmitted) {
+	    while (radeonGetLastFrame (rmesa) < sarea->last_frame)
+	       ;
+	 }
+	 else {
+	    UNLOCK_HARDWARE( rmesa ); 
+	    radeonWaitIrq( rmesa );	
+	    LOCK_HARDWARE( rmesa ); 
+	 }
+	 rmesa->irqsEmitted = 10;
       }
-      wait++;
-      /* Spin in place a bit so we aren't hammering the bus */
-      for ( i = 0 ; i < 1024 ; i++ ) {
-	 delay();
+
+      if (rmesa->irqsEmitted) {
+	 radeonEmitIrqLocked( rmesa );
+	 rmesa->irqsEmitted--;
+      }
+   } 
+   else {
+      while (radeonGetLastFrame (rmesa) < sarea->last_frame) {
+	 UNLOCK_HARDWARE( rmesa ); 
+	 if (rmesa->do_usleeps) 
+	    do_usleep(1, __FUNCTION__); 
+	 LOCK_HARDWARE( rmesa ); 
       }
    }
-
-   return wait;
 }
 
 /* Copy the back color buffer to the front color buffer.
@@ -520,26 +744,22 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
 
    rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
-      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void*)rmesa->glCtx );
+   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, rmesa->glCtx );
    }
 
    RADEON_FIREVERTICES( rmesa );
 
    LOCK_HARDWARE( rmesa );
 
-   nbox = rmesa->dri.drawable->numClipRects; /* must be in locked region */
 
    /* Throttle the frame rate -- only allow one pending swap buffers
     * request at a time.
     */
-   if ( !radeonWaitForFrameCompletion( rmesa ) ) {
-      rmesa->hardwareWentIdle = 1;
-   } else {
-      rmesa->hardwareWentIdle = 0;
-   }
+   radeonWaitForFrameCompletion( rmesa );
+   radeonWaitForVBlank( rmesa );
 
-   nbox = dPriv->numClipRects;
+   nbox = rmesa->dri.drawable->numClipRects; /* must be in locked region */
 
    for ( i = 0 ; i < nbox ; ) {
       GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
@@ -553,28 +773,16 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
       }
       rmesa->sarea->nbox = n;
 
-      ret = drmRadeonSwapBuffers( rmesa->dri.fd );
+      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
 
       if ( ret ) {
-	 fprintf( stderr, "drmRadeonSwapBuffers: return = %d\n", ret );
+	 fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
 	 UNLOCK_HARDWARE( rmesa );
 	 exit( 1 );
       }
    }
 
    UNLOCK_HARDWARE( rmesa );
-
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT_ALL );
-   if ( rmesa->state.texture.unit[0].texobj )
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   if ( rmesa->state.texture.unit[1].texobj )
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-
-
-   rmesa->upload_cliprects = 1;
-
-   /* Log the performance counters if necessary */
-   radeonPerformanceCounters( rmesa );
 }
 
 void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
@@ -588,61 +796,54 @@ void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
 
    rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
-      fprintf( stderr, "\n%s( %p ): page=%d\n\n",
-	       __FUNCTION__, (void*)rmesa->glCtx, rmesa->currentPage );
+   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+      fprintf(stderr, "%s %d\n", __FUNCTION__, 
+	      rmesa->sarea->pfCurrentPage );
    }
 
    RADEON_FIREVERTICES( rmesa );
 
    LOCK_HARDWARE( rmesa );
 
-   /* Throttle the frame rate -- only allow one pending swap buffers
-    * request at a time.
+   /* Need to do this for the perf box placement:
     */
-   if ( !radeonWaitForFrameCompletion( rmesa ) ) {
-      rmesa->hardwareWentIdle = 1;
-   } else {
-      rmesa->hardwareWentIdle = 0;
+   if (rmesa->dri.drawable->numClipRects)
+   {
+      XF86DRIClipRectPtr box = rmesa->dri.drawable->pClipRects;
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      b[0] = box[0];
+      rmesa->sarea->nbox = 1;
    }
 
-   /* The kernel will have been initialized to perform page flipping
-    * on a swapbuffers ioctl.
+
+   /* Throttle the frame rate -- only allow one pending swap buffers
+    * request at a time.
     */
-   ret = drmRadeonSwapBuffers( rmesa->dri.fd );
+   radeonWaitForFrameCompletion( rmesa );
+   radeonWaitForVBlank( rmesa );
+
+   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
 
    UNLOCK_HARDWARE( rmesa );
 
    if ( ret ) {
-      fprintf( stderr, "drmRadeonSwapBuffers: return = %d\n", ret );
+      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
       exit( 1 );
    }
 
-   if ( rmesa->currentPage == 0 ) {
+   if ( rmesa->sarea->pfCurrentPage == 1 ) {
 	 rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
 	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
-	 rmesa->currentPage = 1;
    } else {
 	 rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
 	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
-	 rmesa->currentPage = 0;
    }
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-   rmesa->state.hw.context.rb3d_coloroffset = rmesa->state.color.drawOffset;
-   rmesa->state.hw.context.rb3d_colorpitch  = rmesa->state.color.drawPitch;
-
-   /* Log the performance counters if necessary */
-   radeonPerformanceCounters( rmesa );
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset;
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
 }
 
-void radeonPerformanceCounters( radeonContextPtr rmesa )
-{
-}
-
-void radeonPerformanceBoxesLocked( radeonContextPtr rmesa )
-{
-}
 
 /* ================================================================
  * Buffer clear
@@ -659,25 +860,29 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
    CARD32 clear;
    GLuint flags = 0;
    GLuint color_mask = 0;
-/*     GLuint depth_mask = 0; */
    GLint ret, i;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
+   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
       fprintf( stderr, "%s:  all=%d cx=%d cy=%d cw=%d ch=%d\n",
 	       __FUNCTION__, all, cx, cy, cw, ch );
    }
 
-   RADEON_FIREVERTICES( rmesa );
+   radeonEmitState( rmesa );
+
+   /* Need to cope with lostcontext here as kernel relies on
+    * some residual state:
+    */
+   RADEON_FIREVERTICES( rmesa ); 
 
    if ( mask & DD_FRONT_LEFT_BIT ) {
       flags |= RADEON_FRONT;
-      color_mask = rmesa->state.hw.mask.rb3d_planemask;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~DD_FRONT_LEFT_BIT;
    }
 
    if ( mask & DD_BACK_LEFT_BIT ) {
       flags |= RADEON_BACK;
-      color_mask = rmesa->state.hw.mask.rb3d_planemask;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~DD_BACK_LEFT_BIT;
    }
 
@@ -691,107 +896,127 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
       mask &= ~DD_STENCIL_BIT;
    }
 
-   if ( flags ) {
-      /* Flip top to bottom */
-      cx += dPriv->x;
-      cy  = dPriv->y + dPriv->h - cy - ch;
+   if ( mask )
+      _swrast_Clear( ctx, mask, all, cx, cy, cw, ch );
 
-      LOCK_HARDWARE( rmesa );
+   if ( !flags ) 
+      return;
 
-      /* Throttle the number of clear ioctls we do.
-       */
-      while ( 1 ) {
+
+   /* Flip top to bottom */
+   cx += dPriv->x;
+   cy  = dPriv->y + dPriv->h - cy - ch;
+
+   LOCK_HARDWARE( rmesa );
+
+   /* Throttle the number of clear ioctls we do.
+    */
+   while ( 1 ) {
+      int ret;
+
+      if (rmesa->dri.screen->drmMinor >= 4) {
+	drmRadeonGetParam gp;
+
+	gp.param = RADEON_PARAM_LAST_CLEAR;
+	gp.value = (int *)&clear;
+	ret = drmCommandWriteRead( rmesa->dri.fd,
+				   DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
+      } else
+	ret = -EINVAL;
+
+#ifndef __alpha__
+      if ( ret == -EINVAL ) {
 	 clear = INREG( RADEON_LAST_CLEAR_REG );
-	 if ( sarea->last_clear - clear <= RADEON_MAX_CLEARS ) {
-	    break;
-	 }
-	 /* Spin in place a bit so we aren't hammering the bus */
-	 for ( i = 0 ; i < 1024 ; i++ ) {
-	    delay();
-	 }
+	 ret = 0;
+      }
+#endif
+      if ( ret ) {
+	 fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
+	 exit(1);
+      }
+      if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+	 fprintf( stderr, "%s( %d )\n", __FUNCTION__, (int)clear );
+	 if ( ret ) fprintf( stderr, " ( RADEON_LAST_CLEAR register read directly )\n" );
       }
 
-      /* Emit any new MASKS state.  This ioctl uses the old
-       * sarea-based state mechanism, which is why I'm not using
-       * emit_state() above.  Time for a new ioctl?  
-       */
-      if ( rmesa->state.hw.dirty ) {
-	 memcpy( &sarea->ContextState, &rmesa->state.hw, 
-		 sizeof(sarea->ContextState));
-	 sarea->dirty |= RADEON_UPLOAD_CONTEXT_ALL;
+      if ( sarea->last_clear - clear <= RADEON_MAX_CLEARS ) {
+	 break;
       }
 
+      if ( rmesa->do_usleeps ) {
+	 UNLOCK_HARDWARE( rmesa );
+	 do_usleep(1, __FUNCTION__);
+	 LOCK_HARDWARE( rmesa );
+      }
+   }
 
-      for ( i = 0 ; i < dPriv->numClipRects ; ) {
-	 GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
-	 XF86DRIClipRectPtr box = dPriv->pClipRects;
-	 XF86DRIClipRectPtr b = rmesa->sarea->boxes;
-	 GLint n = 0;
-
-	 if ( !all ) {
-	    for ( ; i < nr ; i++ ) {
-	       GLint x = box[i].x1;
-	       GLint y = box[i].y1;
-	       GLint w = box[i].x2 - x;
-	       GLint h = box[i].y2 - y;
-
-	       if ( x < cx ) w -= cx - x, x = cx;
-	       if ( y < cy ) h -= cy - y, y = cy;
-	       if ( x + w > cx + cw ) w = cx + cw - x;
-	       if ( y + h > cy + ch ) h = cy + ch - y;
-	       if ( w <= 0 ) continue;
-	       if ( h <= 0 ) continue;
-
-	       b->x1 = x;
-	       b->y1 = y;
-	       b->x2 = x + w;
-	       b->y2 = y + h;
-	       b++;
-	       n++;
-	    }
-	 } else {
-	    for ( ; i < nr ; i++ ) {
-	       *b++ = box[i];
-	       n++;
-	    }
-	 }
+   for ( i = 0 ; i < dPriv->numClipRects ; ) {
+      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
+      XF86DRIClipRectPtr box = dPriv->pClipRects;
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      drmRadeonClearType clear;
+      drmRadeonClearRect depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
+      GLint n = 0;
 
-	 rmesa->sarea->nbox = n;
-
-/*  	    fprintf( stderr, */
-/*  		     "drmRadeonClear: flag 0x%x color %x depth %x sten %x nbox %d\n", */
-/*  		     flags, */
-/*  		     rmesa->state.color.clear, */
-/*  		     rmesa->state.depth.clear, */
-/*  		     rmesa->state.stencil.clear, */
-/*  		     rmesa->sarea->nbox ); */
-
-	 ret = drmRadeonClear( rmesa->dri.fd, flags,
-			       rmesa->state.color.clear,
-			       rmesa->state.depth.clear,
-			       rmesa->state.hw.mask.rb3d_planemask,
-			       rmesa->state.stencil.clear,
-			       rmesa->sarea->boxes, rmesa->sarea->nbox );
-
-	 if ( ret ) {
-	    UNLOCK_HARDWARE( rmesa );
-	    fprintf( stderr, "drmRadeonClear: return = %d\n", ret );
-	    exit( 1 );
+      if ( !all ) {
+	 for ( ; i < nr ; i++ ) {
+	    GLint x = box[i].x1;
+	    GLint y = box[i].y1;
+	    GLint w = box[i].x2 - x;
+	    GLint h = box[i].y2 - y;
+
+	    if ( x < cx ) w -= cx - x, x = cx;
+	    if ( y < cy ) h -= cy - y, y = cy;
+	    if ( x + w > cx + cw ) w = cx + cw - x;
+	    if ( y + h > cy + ch ) h = cy + ch - y;
+	    if ( w <= 0 ) continue;
+	    if ( h <= 0 ) continue;
+
+	    b->x1 = x;
+	    b->y1 = y;
+	    b->x2 = x + w;
+	    b->y2 = y + h;
+	    b++;
+	    n++;
+	 }
+      } else {
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = box[i];
+	    n++;
 	 }
       }
 
-      UNLOCK_HARDWARE( rmesa );
+      rmesa->sarea->nbox = n;
 
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT_ALL );
-      if ( rmesa->state.texture.unit[0].texobj )
-	 RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-      if ( rmesa->state.texture.unit[1].texobj )
-	 RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-      rmesa->upload_cliprects = 1;
+      clear.flags       = flags;
+      clear.clear_color = rmesa->state.color.clear;
+      clear.clear_depth = rmesa->state.depth.clear;
+      clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      clear.depth_mask  = rmesa->state.stencil.clear;
+      clear.depth_boxes = depth_boxes;
+
+      n--;
+      b = rmesa->sarea->boxes;
+      for ( ; n >= 0 ; n-- ) {
+	 depth_boxes[n].f[RADEON_CLEAR_X1] = (float)b[n].x1;
+	 depth_boxes[n].f[RADEON_CLEAR_Y1] = (float)b[n].y1;
+	 depth_boxes[n].f[RADEON_CLEAR_X2] = (float)b[n].x2;
+	 depth_boxes[n].f[RADEON_CLEAR_Y2] = (float)b[n].y2;
+	 depth_boxes[n].f[RADEON_CLEAR_DEPTH] = 
+	    (float)rmesa->state.depth.clear;
+      }
+
+      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
+			     &clear, sizeof(drmRadeonClearType));
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( rmesa );
+	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
+	 exit( 1 );
+      }
    }
 
-   if ( mask )
-      _swrast_Clear( ctx, mask, all, cx, cy, cw, ch );
+   UNLOCK_HARDWARE( rmesa );
 }
 
 
@@ -799,10 +1024,22 @@ void radeonWaitForIdleLocked( radeonContextPtr rmesa )
 {
     int fd = rmesa->dri.fd;
     int to = 0;
-    int ret;
+    int ret, i = 0;
+
+    rmesa->c_drawWaits++;
 
     do {
-	ret = drmRadeonWaitForIdleCP( fd );
+        do {
+            ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE);
+        } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY );
+        if (ret && ret != -EBUSY) {
+            /*
+             * JO - I'm reluctant to print this message while holding the lock
+             *
+            xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+                   "%s: CP idle %d\n", __FUNCTION__, ret);
+             */
+        }
     } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) );
 
     if ( ret < 0 ) {
@@ -813,32 +1050,91 @@ void radeonWaitForIdleLocked( radeonContextPtr rmesa )
 }
 
 
-void radeonInitIoctlFuncs( GLcontext *ctx )
+static void radeonWaitForIdle( radeonContextPtr rmesa )
 {
-    ctx->Driver.Clear = radeonClear;
+    LOCK_HARDWARE(rmesa);
+    radeonWaitForIdleLocked( rmesa );
+    UNLOCK_HARDWARE(rmesa);
 }
 
 
+void radeonWaitForVBlank( radeonContextPtr rmesa )
+{
+    drmVBlank vbl;
+    int ret;
+
+    if ( !rmesa->radeonScreen->irq )
+	return;
+
+    if ( getenv("LIBGL_SYNC_REFRESH") ) {
+	/* Wait for at least one vertical blank since the last call */
+	vbl.request.type = DRM_VBLANK_RELATIVE;
+	vbl.request.sequence = 1;
+    } else if ( getenv("LIBGL_THROTTLE_REFRESH") ) {
+	/* Wait for at least one vertical blank since the last call */
+	vbl.request.type = DRM_VBLANK_ABSOLUTE;
+	vbl.request.sequence = rmesa->vbl_seq + 1;
+    } else {
+	return;
+    }
+
+    UNLOCK_HARDWARE( rmesa );
+
+    if ((ret = drmWaitVBlank( rmesa->dri.fd, &vbl ))) {
+	fprintf(stderr, "%s: drmWaitVBlank returned %d, IRQs don't seem to be"
+		" working correctly.\nTry running with LIBGL_THROTTLE_REFRESH"
+		" and LIBL_SYNC_REFRESH unset.\n", __FUNCTION__, ret);
+	exit(1);
+    } else if (RADEON_DEBUG & DEBUG_IOCTL)
+	fprintf(stderr, "%s: drmWaitVBlank returned %d\n", __FUNCTION__, ret);
+
+    rmesa->vbl_seq = vbl.reply.sequence;
+
+    LOCK_HARDWARE( rmesa );
+}
 
-void radeonReleaseRetainedBuffer( radeonContextPtr rmesa )
+void radeonFlush( GLcontext *ctx )
 {
-   ASSERT(rmesa->dma.retained);
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 
-   if (rmesa->dma.retained &&
-       rmesa->dma.retained != rmesa->dma.buffer) {
-      RADEON_FIREVERTICES(rmesa); /* FIX ME: dependency tracking for retained */
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
-/*        fprintf(stderr, "releaseRetained: retained %p current %p\n", */
-/*  	      rmesa->dma.retained, rmesa->dma.buffer); */
-      
-      LOCK_HARDWARE(rmesa);
-      drmRadeonFlushPrims( rmesa->dri.fd,
-			   rmesa->dma.retained->idx, 
-			   1,
-			   0, rmesa->store.state,
-			   0, rmesa->store.prim);
-      UNLOCK_HARDWARE(rmesa);
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
+
+   if (rmesa->dri.drmMinor >= 3) {
+      if (!is_empty_list(&rmesa->hw.dirty)) 
+	 radeonEmitState( rmesa );
+   
+      if (rmesa->store.cmd_used)
+	 radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+   }
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+void radeonFinish( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonFlush( ctx );
+
+   if (rmesa->do_irqs) {
+      LOCK_HARDWARE( rmesa );
+      radeonEmitIrqLocked( rmesa );
+      UNLOCK_HARDWARE( rmesa );
+      radeonWaitIrq( rmesa );
    }
+   else
+      radeonWaitForIdle( rmesa );
+}
+
 
-   rmesa->dma.retained = 0;
+void radeonInitIoctlFuncs( GLcontext *ctx )
+{
+    ctx->Driver.Clear = radeonClear;
+    ctx->Driver.Finish = radeonFinish;
+    ctx->Driver.Flush = radeonFlush;
 }
+
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h
index a56b39e16..8777ae278 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h,v 1.4 2002/09/16 18:05:20 eich Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h,v 1.6 2002/12/16 16:18:58 dawes Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -39,90 +39,138 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #ifdef GLX_DIRECT_RENDERING
 
-#include "radeon_dri.h"
+#include "simple_list.h"
 #include "radeon_lock.h"
 
-#include "xf86drm.h"
-#include "xf86drmRadeon.h"
 
-#define RADEON_BUFFER_MAX_DWORDS	(RADEON_BUFFER_SIZE / sizeof(CARD32))
+extern void radeonEmitState( radeonContextPtr rmesa );
+extern void radeonEmitVertexAOS( radeonContextPtr rmesa,
+				 GLuint vertex_size,
+				 GLuint offset );
 
+extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
+				GLuint vertex_format,
+				GLuint primitive,
+				GLuint vertex_nr );
+
+extern void radeonFlushElts( radeonContextPtr rmesa );
+
+extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
+					   GLuint vertex_format,
+					   GLuint primitive,
+					   GLuint min_nr );
+
+extern void radeonEmitAOS( radeonContextPtr rmesa,
+			   struct radeon_dma_region **regions,
+			   GLuint n,
+			   GLuint offset );
+
+
+
+extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * );
+extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa );
+
+extern void radeonAllocDmaRegion( radeonContextPtr rmesa,
+				  struct radeon_dma_region *region,
+				  int bytes, 
+				  int alignment );
+
+extern void radeonAllocDmaRegionVerts( radeonContextPtr rmesa,
+				       struct radeon_dma_region *region,
+				       int numverts,
+				       int vertsize, 
+				       int alignment );
+
+extern void radeonReleaseDmaRegion( radeonContextPtr rmesa,
+				    struct radeon_dma_region *region,
+				    const char *caller );
 
-extern drmBufPtr radeonGetBufferLocked( radeonContextPtr rmesa );
-extern void radeonEmitPrim( radeonContextPtr rmesa );
-extern void radeonFlushPrims( radeonContextPtr rmesa );
-extern void radeonFlushPrimsLocked( radeonContextPtr rmesa );
-extern void radeonFlushPrimsGetBuffer( radeonContextPtr rmesa );
-extern void radeonFireBlitLocked( radeonContextPtr rmesa,
-				  drmBufPtr buffer,
-				  GLint offset, GLint pitch, GLint format,
-				  GLint x, GLint y,
-				  GLint width, GLint height );
 extern void radeonCopyBuffer( const __DRIdrawablePrivate *drawable );
 extern void radeonPageFlip( const __DRIdrawablePrivate *drawable );
+extern void radeonFlush( GLcontext *ctx );
+extern void radeonFinish( GLcontext *ctx );
 extern void radeonWaitForIdleLocked( radeonContextPtr rmesa );
-extern void radeonPerformanceCounters( radeonContextPtr rmesa );
-extern void radeonPerformanceBoxesLocked( radeonContextPtr rmesa );
+extern void radeonWaitForVBlank( radeonContextPtr rmesa );
 extern void radeonInitIoctlFuncs( GLcontext *ctx );
-extern void radeonReleaseRetainedBuffer( radeonContextPtr rmesa );
+extern void radeonGetAllParams( radeonContextPtr rmesa );
+
+/* radeon_compat.c:
+ */
+extern void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
+				       GLuint vertex_format,
+				       GLuint hw_primitive,
+				       GLuint nrverts );
 
 
 /* ================================================================
  * Helper macros:
  */
 
+/* Close off the last primitive, if it exists.
+ */
+#define RADEON_NEWPRIM( rmesa )			\
+do {						\
+   if ( rmesa->dma.flush )			\
+      rmesa->dma.flush( rmesa );	\
+} while (0)
+
 /* Can accomodate several state changes and primitive changes without
  * actually firing the buffer.
  */
-#define RADEON_STATECHANGE( rmesa, flag )				\
-do {									\
-   if ( 0 ) radeonPrintDirty( __FUNCTION__, flag );			\
-   if ( rmesa->dma.low != rmesa->dma.last )				\
-      radeonEmitPrim( rmesa );						\
-   rmesa->state.hw.dirty |= flag;					\
+#define RADEON_STATECHANGE( rmesa, ATOM )			\
+do {								\
+   RADEON_NEWPRIM( rmesa );					\
+   move_to_head( &(rmesa->hw.dirty), &(rmesa->hw.ATOM));	\
 } while (0)
 
+#define RADEON_DB_STATE( ATOM )			        \
+   memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd,	\
+	   rmesa->hw.ATOM.cmd_size * 4)
+
+static __inline int RADEON_DB_STATECHANGE( 
+   radeonContextPtr rmesa,
+   struct radeon_state_atom *atom )
+{
+   if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+      int *tmp;
+      RADEON_NEWPRIM( rmesa );
+      move_to_head( &(rmesa->hw.dirty), atom );
+      tmp = atom->cmd; 
+      atom->cmd = atom->lastcmd;
+      atom->lastcmd = tmp;
+      return 1;
+   }
+   else
+      return 0;
+}
+
 
 /* Fire the buffered vertices no matter what.
  */
-#define RADEON_FIREVERTICES( rmesa )					\
-do {									\
-   if ( rmesa->store.primnr || rmesa->dma.low != rmesa->dma.last ) {	\
-      if ( 0 )								\
-	 fprintf( stderr, "RADEON_FIREVERTICES in %s\n",__FUNCTION__ );	\
-      radeonFlushPrims( rmesa );					\
-   }									\
+#define RADEON_FIREVERTICES( rmesa )			\
+do {							\
+   if ( rmesa->store.cmd_used || rmesa->dma.flush ) {	\
+      radeonFlush( rmesa->glCtx );			\
+   }							\
 } while (0)
 
-
-static __inline void *radeonAllocDmaLow( radeonContextPtr rmesa,
+/* Alloc space in the command buffer
+ */
+static __inline char *radeonAllocCmdBuf( radeonContextPtr rmesa,
 					 int bytes, const char *where )
 {
-   if ( rmesa->dma.low + bytes > rmesa->dma.high ) {
-      if (0) fprintf( stderr, "%s flush for %d (%d/%d/%d)\n",
-		      where, bytes, rmesa->dma.last,
-		      rmesa->dma.low, rmesa->dma.high );
-      radeonFlushPrimsGetBuffer( rmesa );
-   }
+   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
+      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+   
+   assert(rmesa->dri.drmMinor >= 3);
 
    {
-      GLubyte *head = rmesa->dma.address + rmesa->dma.low;
-      if (0) fprintf( stderr, "%s: alloc %d (%d/%d/%d)\n",
-		      where, bytes, rmesa->dma.last,
-		      rmesa->dma.low, rmesa->dma.high );
-      rmesa->dma.low += bytes;
+      char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+      rmesa->store.cmd_used += bytes;
       return head;
    }
 }
 
-static __inline void *radeonAllocDmaHigh( radeonContextPtr rmesa, int bytes )
-{
-   if ( rmesa->dma.low + bytes > rmesa->dma.high )
-      radeonFlushPrimsGetBuffer( rmesa );
-
-   rmesa->dma.high -= bytes;
-   return (void *)(rmesa->dma.address + rmesa->dma.high);
-}
 
 
 
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.c
index 0c05f507b..7afa8ea9c 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.c
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_lock.c,v 1.4 2002/02/22 21:45:00 dawes Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_lock.c,v 1.5 2002/10/30 12:51:55 alanh Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -38,12 +38,47 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_lock.h"
 #include "radeon_tex.h"
 #include "radeon_state.h"
+#include "radeon_ioctl.h"
 
 #if DEBUG_LOCKING
 char *prevLockFile = NULL;
 int prevLockLine = 0;
 #endif
 
+/* Turn on/off page flipping according to the flags in the sarea:
+ */
+static void
+radeonUpdatePageFlipping( radeonContextPtr rmesa )
+{
+   int use_back;
+
+   if (rmesa->dri.drmMinor < 3)
+      return;
+
+   rmesa->doPageFlip = rmesa->sarea->pfAllowPageFlip;
+
+   use_back = (rmesa->glCtx->Color.DriverDrawBuffer == GL_BACK_LEFT);
+   use_back ^= (rmesa->sarea->pfCurrentPage == 1);
+
+   if ( RADEON_DEBUG & DEBUG_VERBOSE )
+      fprintf(stderr, "%s allow %d current %d\n", __FUNCTION__, 
+	      rmesa->doPageFlip,
+	      rmesa->sarea->pfCurrentPage );
+
+   if ( use_back ) {
+	 rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+   } else {
+	 rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+   }
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset;
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
+}
+
+
 
 /* Update the hardware state.  This is called if another context has
  * grabbed the hardware lock, which includes the X server.  This
@@ -73,6 +108,7 @@ void radeonGetLock( radeonContextPtr rmesa, GLuint flags )
    DRI_VALIDATE_DRAWABLE_INFO( rmesa->dri.display, sPriv, dPriv );
 
    if ( rmesa->lastStamp != dPriv->lastStamp ) {
+      radeonUpdatePageFlipping( rmesa );
       radeonSetCliprects( rmesa, rmesa->glCtx->Color.DriverDrawBuffer );
       radeonUpdateViewportOffset( rmesa->glCtx );
       rmesa->lastStamp = dPriv->lastStamp;
@@ -81,24 +117,8 @@ void radeonGetLock( radeonContextPtr rmesa, GLuint flags )
    if ( sarea->ctxOwner != rmesa->dri.hwContext ) {
       sarea->ctxOwner = rmesa->dri.hwContext;
 
-      rmesa->upload_cliprects = 1;
-      if ( rmesa->store.statenr ) {
-	 rmesa->store.state[0].dirty = RADEON_UPLOAD_CONTEXT_ALL;
-	 if ( rmesa->store.texture[0][0] )
-	    rmesa->store.state[0].dirty |= RADEON_UPLOAD_TEX0;
-	 if ( rmesa->store.texture[1][0] )
-	    rmesa->store.state[0].dirty |= RADEON_UPLOAD_TEX1;
-      }
-      else {
-	 rmesa->state.hw.dirty = RADEON_UPLOAD_CONTEXT_ALL;
-	 if ( rmesa->state.texture.unit[0].texobj )
-	    rmesa->state.hw.dirty |= RADEON_UPLOAD_TEX0;
-	 if ( rmesa->state.texture.unit[1].texobj )
-	    rmesa->state.hw.dirty |= RADEON_UPLOAD_TEX1;
-      }
-
       for ( i = 0 ; i < rmesa->texture.numHeaps ; i++ ) {
-	 if ( sarea->texAge[i] != rmesa->texture.age[i] ) {
+	 if ( rmesa->texture.heap[i] && sarea->texAge[i] != rmesa->texture.age[i] ) {
 	    radeonAgeTextures( rmesa, i );
 	 }
       }
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.h
index 9cb77646a..5402df83c 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_lock.h
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_lock.h,v 1.2 2002/02/22 21:45:00 dawes Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_lock.h,v 1.3 2002/10/30 12:51:55 alanh Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -85,17 +85,18 @@ extern int prevLockLine;
  * do not do any drawing !!!
  */
 
+
 /* Lock the hardware and validate our state.
  */
-#define LOCK_HARDWARE( rmesa )						\
-   do {									\
-      char __ret = 0;							\
-      DEBUG_CHECK_LOCK();						\
-      DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext,			\
-	       (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret );		\
-      if ( __ret )							\
-	 radeonGetLock( rmesa, 0 );					\
-      DEBUG_LOCK();							\
+#define LOCK_HARDWARE( rmesa )					\
+   do {								\
+      char __ret = 0;						\
+      DEBUG_CHECK_LOCK();					\
+      DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext,		\
+	       (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret );	\
+      if ( __ret )						\
+	 radeonGetLock( rmesa, 0 );				\
+      DEBUG_LOCK();						\
    } while (0)
 
 /* Unlock the hardware.
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_maos.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos.c
new file mode 100644
index 000000000..c62edd715
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos.c
@@ -0,0 +1,12 @@
+
+
+/* If using new packets, can choose either verts or arrays.
+ * Otherwise, must use verts.
+ */
+#include "radeon_context.h"
+#define RADEON_MAOS_VERTS 1
+#if (RADEON_MAOS_VERTS) || (RADEON_OLD_PACKETS)
+#include "radeon_maos_verts.c"
+#else
+#include "radeon_maos_arrays.c"
+#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_maos.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos.h
new file mode 100644
index 000000000..f4907fe15
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos.h
@@ -0,0 +1,47 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_maos.h,v 1.1 2002/10/30 12:51:55 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Grahpics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAHPICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef __RADEON_MAOS_H__
+#define __RADEON_MAOS_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "radeon_context.h"
+
+extern void radeonEmitArrays( GLcontext *ctx, GLuint inputs );
+extern void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs );
+
+#endif
+#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c
new file mode 100644
index 000000000..08375d673
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c
@@ -0,0 +1,592 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c,v 1.1 2002/10/30 12:51:55 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "mem.h"
+#include "mmath.h"
+#include "macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_imm_debug.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+
+/* Usage:
+ *   - from radeon_tcl_render
+ *   - call radeonEmitArrays to ensure uptodate arrays in dma
+ *   - emit primitives (new type?) which reference the data
+ *       -- need to use elts for lineloop, quads, quadstrip/flat
+ *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
+ *
+ */
+static void emit_ubyte_rgba3( GLcontext *ctx,
+		       struct radeon_dma_region *rvb,
+		       char *data,
+		       int stride,
+		       int count )
+{
+   int i;
+   radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d out %p\n",
+	      __FUNCTION__, count, stride, out);
+
+   for (i = 0; i < count; i++) {
+      out->red   = *data;
+      out->green = *(data+1);
+      out->blue  = *(data+2);
+      out->alpha = 0xFF;
+      out++;
+      data += stride;
+   }
+}
+
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr )					\
+do {									\
+	int __tmp;							\
+	__asm__ __volatile__( "rep ; movsl"				\
+			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
+			      : "0" (nr),				\
+			        "D" ((long)dst),			\
+			        "S" ((long)src) );			\
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr )		\
+do {						\
+   int j;					\
+   for ( j = 0 ; j < nr ; j++ )			\
+      dst[j] = ((int *)src)[j];			\
+   dst += nr;					\
+} while (0)
+#endif
+
+
+
+static void emit_ubyte_rgba4( GLcontext *ctx,
+			      struct radeon_dma_region *rvb,
+			      char *data,
+			      int stride,
+			      int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   if (stride == 4)
+       COPY_DWORDS( out, data, count );
+   else
+      for (i = 0; i < count; i++) {
+	 *out++ = LE32_TO_CPU(*(int *)data);
+	 data += stride;
+      }
+}
+
+
+static void emit_ubyte_rgba( GLcontext *ctx,
+			     struct radeon_dma_region *rvb,
+			     char *data,
+			     int size,
+			     int stride,
+			     int count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+
+   assert (!rvb->buf);
+
+   if (stride == 0) {
+      radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
+      count = 1;
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 0;
+      rvb->aos_size = 1;
+   }
+   else {
+      radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 );	/* alignment? */
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 1;
+      rvb->aos_size = 1;
+   }
+
+   /* Emit the data
+    */
+   switch (size) {
+   case 3:
+      emit_ubyte_rgba3( ctx, rvb, data, stride, count );
+      break;
+   case 4:
+      emit_ubyte_rgba4( ctx, rvb, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+}
+
+
+
+
+static void emit_vec8( GLcontext *ctx,
+		       struct radeon_dma_region *rvb,
+		       char *data,
+		       int stride,
+		       int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   if (stride == 8)
+      COPY_DWORDS( out, data, count*2 );
+   else
+      for (i = 0; i < count; i++) {
+	 out[0] = *(int *)data;
+	 out[1] = *(int *)(data+4);
+	 out += 2;
+	 data += stride;
+      }
+}
+
+static void emit_vec12( GLcontext *ctx,
+		       struct radeon_dma_region *rvb,
+		       char *data,
+		       int stride,
+		       int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+	      __FUNCTION__, count, stride, out, data);
+
+   if (stride == 12)
+      COPY_DWORDS( out, data, count*3 );
+   else
+      for (i = 0; i < count; i++) {
+	 out[0] = *(int *)data;
+	 out[1] = *(int *)(data+4);
+	 out[2] = *(int *)(data+8);
+	 out += 3;
+	 data += stride;
+      }
+}
+
+static void emit_vec16( GLcontext *ctx,
+			struct radeon_dma_region *rvb,
+			char *data,
+			int stride,
+			int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   if (stride == 16)
+      COPY_DWORDS( out, data, count*4 );
+   else
+      for (i = 0; i < count; i++) {
+	 out[0] = *(int *)data;
+	 out[1] = *(int *)(data+4);
+	 out[2] = *(int *)(data+8);
+	 out[3] = *(int *)(data+12);
+	 out += 4;
+	 data += stride;
+      }
+}
+
+
+static void emit_vector( GLcontext *ctx,
+			 struct radeon_dma_region *rvb,
+			 char *data,
+			 int size,
+			 int stride,
+			 int count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+
+   assert (!rvb->buf);
+
+   if (stride == 0) {
+      radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
+      count = 1;
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 0;
+      rvb->aos_size = size;
+   }
+   else {
+      radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 );	/* alignment? */
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = size;
+      rvb->aos_size = size;
+   }
+
+   /* Emit the data
+    */
+   switch (size) {
+   case 2:
+      emit_vec8( ctx, rvb, data, stride, count );
+      break;
+   case 3:
+      emit_vec12( ctx, rvb, data, stride, count );
+      break;
+   case 4:
+      emit_vec16( ctx, rvb, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+
+}
+
+
+
+static void emit_s0_vec( GLcontext *ctx,
+			 struct radeon_dma_region *rvb,
+			 char *data,
+			 int stride,
+			 int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   for (i = 0; i < count; i++) {
+      out[0] = *(int *)data;
+      out[1] = 0;
+      out += 2;
+      data += stride;
+   }
+}
+
+static void emit_stq_vec( GLcontext *ctx,
+			 struct radeon_dma_region *rvb,
+			 char *data,
+			 int stride,
+			 int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   for (i = 0; i < count; i++) {
+      out[0] = *(int *)data;
+      out[1] = *(int *)(data+4);
+      out[2] = *(int *)(data+12);
+      out += 3;
+      data += stride;
+   }
+}
+
+
+
+
+static void emit_tex_vector( GLcontext *ctx,
+			     struct radeon_dma_region *rvb,
+			     char *data,
+			     int size,
+			     int stride,
+			     int count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int emitsize;
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+
+   assert (!rvb->buf);
+
+   switch (size) {
+   case 4: emitsize = 3; break;
+   default: emitsize = 2; break;
+   }
+
+
+   if (stride == 0) {
+      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
+      count = 1;
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 0;
+      rvb->aos_size = emitsize;
+   }
+   else {
+      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = emitsize;
+      rvb->aos_size = emitsize;
+   }
+
+
+   /* Emit the data
+    */
+   switch (size) {
+   case 1:
+      emit_s0_vec( ctx, rvb, data, stride, count ); 
+      break;
+   case 2:
+      emit_vec8( ctx, rvb, data, stride, count );
+      break;
+   case 3:
+      emit_vec8( ctx, rvb, data, stride, count );
+      break;
+   case 4:
+      emit_stq_vec( ctx, rvb, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+}
+
+
+
+
+/* Emit any changed arrays to new agp memory, re-emit a packet to
+ * update the arrays.  
+ */
+void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+   struct radeon_dma_region **component = rmesa->tcl.aos_components;
+   GLuint nr = 0;
+   GLuint vfmt = 0;
+   GLuint count = VB->Count;
+   GLuint vtx;
+   
+   if (RADEON_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, inputs );
+
+   if (1) {
+      if (!rmesa->tcl.obj.buf) 
+	 emit_vector( ctx, 
+		      &rmesa->tcl.obj, 
+		      (char *)VB->ObjPtr->data,
+		      VB->ObjPtr->size,
+		      VB->ObjPtr->stride,
+		      count);
+
+      switch( VB->ObjPtr->size ) {
+      case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
+      case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
+      case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
+      default:
+      }
+      component[nr++] = &rmesa->tcl.obj;
+   }
+   
+
+   if (inputs & VERT_NORM) {
+      if (!rmesa->tcl.norm.buf)
+	 emit_vector( ctx, 
+		      &(rmesa->tcl.norm), 
+		      (char *)VB->NormalPtr->data,
+		      3,
+		      VB->NormalPtr->stride,
+		      count);
+
+      vfmt |= RADEON_CP_VC_FRMT_N0;
+      component[nr++] = &rmesa->tcl.norm;
+   }
+
+   if (inputs & VERT_RGBA) {
+      if (VB->ColorPtr[0]->Type == GL_UNSIGNED_BYTE) {
+	 if (!rmesa->tcl.rgba.buf)
+	    emit_ubyte_rgba( ctx, 
+			     &rmesa->tcl.rgba, 
+			     (char *)VB->ColorPtr[0]->Ptr,
+			     VB->ColorPtr[0]->Size,
+			     VB->ColorPtr[0]->StrideB,
+			     count);
+
+	 vfmt |= RADEON_CP_VC_FRMT_PKCOLOR; 
+      }
+      else {
+	 int emitsize;
+
+	 if (VB->ColorPtr[0]->Size == 4 &&
+	     (VB->ColorPtr[0]->StrideB != 0 ||
+	      ((GLfloat *)VB->ColorPtr[0]->Ptr)[3] != 1.0)) { 
+	    vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
+	    emitsize = 4;
+	 }
+	 else { 
+	    vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
+	    emitsize = 3;
+	 }
+
+
+	 if (!rmesa->tcl.rgba.buf)
+	    emit_vector( ctx, 
+			 &(rmesa->tcl.rgba), 
+			 (char *)VB->ColorPtr[0]->Ptr,
+			 emitsize,
+			 VB->ColorPtr[0]->StrideB,
+			 count);
+      }
+
+      component[nr++] = &rmesa->tcl.rgba;
+   }
+
+
+   if (inputs & VERT_SPEC_RGB) {
+      if (!rmesa->tcl.spec.buf) {
+	 if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+	    radeon_import_float_spec_colors( ctx );
+
+	 emit_ubyte_rgba( ctx, 
+			  &rmesa->tcl.spec, 
+			  (char *)VB->SecondaryColorPtr[0]->Ptr,
+			  3,
+			  VB->SecondaryColorPtr[0]->StrideB,
+			  count);
+      }
+
+      vfmt |= RADEON_CP_VC_FRMT_PKSPEC; 
+      component[nr++] = &rmesa->tcl.spec;
+   }
+
+   vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
+	  ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1));
+      
+   if (inputs & VERT_TEX0) {
+      if (!rmesa->tcl.tex[0].buf)
+	 emit_tex_vector( ctx, 
+			  &(rmesa->tcl.tex[0]), 
+			  (char *)VB->TexCoordPtr[0]->data,
+			  VB->TexCoordPtr[0]->size,
+			  VB->TexCoordPtr[0]->stride,
+			  count );
+
+      switch( VB->TexCoordPtr[0]->size ) {
+      case 4:
+	 vtx |= RADEON_TCL_VTX_Q0; 
+	 vfmt |= RADEON_CP_VC_FRMT_Q0;
+      default: 
+	 vfmt |= RADEON_CP_VC_FRMT_ST0;
+      }
+      component[nr++] = &rmesa->tcl.tex[0];
+   }
+
+   if (inputs & VERT_TEX1) {
+      if (!rmesa->tcl.tex[1].buf)
+	 emit_tex_vector( ctx, 
+			  &(rmesa->tcl.tex[1]), 
+			  (char *)VB->TexCoordPtr[1]->data,
+			  VB->TexCoordPtr[1]->size,
+			  VB->TexCoordPtr[1]->stride,
+			  count );
+	 
+      switch( VB->TexCoordPtr[1]->size ) {
+      case 4: 
+	 vtx |= RADEON_TCL_VTX_Q1;
+	 vfmt |= RADEON_CP_VC_FRMT_Q1;
+      default: 
+	 vfmt |= RADEON_CP_VC_FRMT_ST1;
+      }
+      component[nr++] = &rmesa->tcl.tex[1];
+   }
+
+   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
+   }
+
+   rmesa->tcl.nr_aos_components = nr;
+   rmesa->tcl.vertex_format = vfmt;
+}
+
+
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, newinputs );
+
+   if (newinputs & VERT_OBJ) 
+     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
+
+   if (newinputs & VERT_NORM) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
+
+   if (newinputs & VERT_RGBA) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
+
+   if (newinputs & VERT_SPEC_RGB) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
+
+   if (newinputs & VERT_TEX0)
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[0], __FUNCTION__ );
+
+   if (newinputs & VERT_TEX1)
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[1], __FUNCTION__ );
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_vbtmp.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_vbtmp.h
new file mode 100644
index 000000000..8ce767774
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_vbtmp.h
@@ -0,0 +1,371 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+#undef TCL_DEBUG
+#ifndef TCL_DEBUG
+#define TCL_DEBUG 0
+#endif
+
+static void TAG(emit)( GLcontext *ctx,
+		       GLuint start, GLuint end,
+		       void *dest )
+{
+   LOCALVARS
+      struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint (*tc0)[4], (*tc1)[4];
+   GLfloat *fog;
+   GLuint (*tc2)[4], (*norm)[3];
+   GLubyte (*col)[4], (*spec)[4];
+   GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride;
+   GLuint tc2_stride, norm_stride;
+   GLuint (*coord)[4];
+   GLuint coord_stride;
+   GLubyte dummy[4];
+   int i;
+
+   union emit_union *v = (union emit_union *)dest;
+
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s\n", __FUNCTION__); 
+
+   /* The vertex code expects Obj to be clean to element 3.  To fix
+    * this, add more vertex code (for obj-2, obj-3) or preferably move
+    * to maos.  
+    */
+   if (VB->ObjPtr->size < 3) {
+      if (VB->ObjPtr->flags & VEC_NOT_WRITEABLE) {
+	 VB->import_data( ctx, VERT_OBJ, VEC_NOT_WRITEABLE );
+      }
+      _mesa_vector4f_clean_elem( VB->ObjPtr, VB->Count, 2 );
+   }
+
+   if (DO_W && VB->ObjPtr->size < 4) {
+      if (VB->ObjPtr->flags & VEC_NOT_WRITEABLE) {
+	 VB->import_data( ctx, VERT_OBJ, VEC_NOT_WRITEABLE );
+      }
+      _mesa_vector4f_clean_elem( VB->ObjPtr, VB->Count, 3 );
+   }
+
+   coord = (GLuint (*)[4])VB->ObjPtr->data;
+   coord_stride = VB->ObjPtr->stride;
+
+   if (DO_TEX2) {
+      const GLuint t2 = GET_TEXSOURCE(2);
+      tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data;
+      tc2_stride = VB->TexCoordPtr[t2]->stride;
+      if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) {
+	 if (VB->TexCoordPtr[t2]->flags & VEC_NOT_WRITEABLE) {
+	    VB->import_data( ctx, VERT_TEX2, VEC_NOT_WRITEABLE );
+	 }
+	 _mesa_vector4f_clean_elem( VB->TexCoordPtr[t2], VB->Count, 3 );
+      }
+   }
+
+   if (DO_TEX1) {
+      if (VB->TexCoordPtr[1]) {
+	 const GLuint t1 = GET_TEXSOURCE(1);
+	 tc1 = (GLuint (*)[4])VB->TexCoordPtr[t1]->data;
+	 tc1_stride = VB->TexCoordPtr[t1]->stride;
+	 if (DO_PTEX && VB->TexCoordPtr[t1]->size < 4) {
+	    if (VB->TexCoordPtr[t1]->flags & VEC_NOT_WRITEABLE) {
+	       VB->import_data( ctx, VERT_TEX1, VEC_NOT_WRITEABLE );
+	    }
+	    _mesa_vector4f_clean_elem( VB->TexCoordPtr[t1], VB->Count, 3 );
+	 }
+      } else {
+	 tc1 = (GLuint (*)[4])&ctx->Current.Texcoord[1]; /* could be anything, really */
+	 tc1_stride = 0;
+      }
+   }
+
+   if (DO_TEX0) {
+      if (VB->TexCoordPtr[0]) {
+	 const GLuint t0 = GET_TEXSOURCE(0);
+	 tc0_stride = VB->TexCoordPtr[t0]->stride;
+	 tc0 = (GLuint (*)[4])VB->TexCoordPtr[t0]->data;
+	 if (DO_PTEX && VB->TexCoordPtr[t0]->size < 4) {
+	    if (VB->TexCoordPtr[t0]->flags & VEC_NOT_WRITEABLE) {
+	       VB->import_data( ctx, VERT_TEX0, VEC_NOT_WRITEABLE );
+	    }
+	    _mesa_vector4f_clean_elem( VB->TexCoordPtr[t0], VB->Count, 3 );
+	 }
+      } else {
+	 tc0 = (GLuint (*)[4])&ctx->Current.Texcoord[0]; /* could be anything, really */
+	 tc0_stride = 0;
+      }
+	 
+   }
+
+   if (DO_NORM) {
+      if (VB->NormalPtr) {
+	 norm_stride = VB->NormalPtr->stride;
+	 norm = (GLuint (*)[3])VB->NormalPtr->data;
+      } else {
+	 norm_stride = 0;
+	 norm = (GLuint (*)[3])&ctx->Current.Normal;
+      }
+   }
+
+   if (DO_RGBA) {
+      if (VB->ColorPtr[0]) {
+	 /* This is incorrect when colormaterial is enabled:
+	  */
+	 if (VB->ColorPtr[0]->Type != GL_UNSIGNED_BYTE) {
+	    if (0) fprintf(stderr, "IMPORTING FLOAT COLORS\n");
+	    IMPORT_FLOAT_COLORS( ctx );
+	 }
+	 col = (GLubyte (*)[4])VB->ColorPtr[0]->Ptr;
+	 col_stride = VB->ColorPtr[0]->StrideB;
+      } else {
+	 col = &dummy; /* any old memory is fine */
+	 col_stride = 0;
+      }
+      
+   }
+
+   if (DO_SPEC) {
+      if (VB->SecondaryColorPtr[0]) {
+	 if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+	    IMPORT_FLOAT_SPEC_COLORS( ctx );
+	 spec = (GLubyte (*)[4])VB->SecondaryColorPtr[0]->Ptr;
+	 spec_stride = VB->SecondaryColorPtr[0]->StrideB;
+      } else {
+	 spec = &dummy;
+	 spec_stride = 0;
+      }
+	 
+   }
+
+   if (DO_FOG) {
+      if (VB->FogCoordPtr) {
+	 fog = VB->FogCoordPtr->data;
+	 fog_stride = VB->FogCoordPtr->stride;
+      } else {
+	 fog = (GLfloat *)&dummy; *fog = 0;
+	 fog_stride = 0;
+      }
+	      
+   }
+   
+   
+   if (VB->importable_data) {
+      if (start) {
+	 coord =  (GLuint (*)[4])((GLubyte *)coord + start * coord_stride);
+	 if (DO_TEX0)
+	    tc0 =  (GLuint (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+	 if (DO_TEX1) 
+	    tc1 =  (GLuint (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+	 if (DO_TEX2) 
+	    tc2 =  (GLuint (*)[4])((GLubyte *)tc2 + start * tc2_stride);
+	 if (DO_NORM) 
+	    norm =  (GLuint (*)[3])((GLubyte *)norm + start * norm_stride);
+	 if (DO_RGBA) 
+	    STRIDE_4UB(col, start * col_stride);
+	 if (DO_SPEC)
+	    STRIDE_4UB(spec, start * spec_stride);
+	 if (DO_FOG)
+	    STRIDE_F(fog, start * fog_stride);
+      }
+
+      for (i=start; i < end; i++) {
+	 v[0].ui = coord[0][0];
+	 v[1].ui = coord[0][1];
+	 v[2].ui = coord[0][2];
+	 if (TCL_DEBUG) fprintf(stderr, "%d: %.2f %.2f %.2f ", i, v[0].f, v[1].f, v[2].f);
+	 if (DO_W) {
+	    v[3].ui = coord[0][3];
+	    if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[3].f);
+	    v += 4;
+	 } 
+	 else
+	    v += 3;
+	 coord =  (GLuint (*)[4])((GLubyte *)coord +  coord_stride);
+
+	 if (DO_NORM) {
+	    v[0].ui = norm[0][0];
+	    v[1].ui = norm[0][1];
+	    v[2].ui = norm[0][2];
+	    if (TCL_DEBUG) fprintf(stderr, "norm: %.2f %.2f %.2f ", v[0].f, v[1].f, v[2].f);
+	    v += 3;
+	    norm =  (GLuint (*)[3])((GLubyte *)norm +  norm_stride);
+	 }
+	 if (DO_RGBA) {
+	    v[0].ui = LE32_TO_CPU(*(GLuint *)&col[0]);
+	    STRIDE_4UB(col, col_stride);
+	    if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
+	    v++;
+	 }
+	 if (DO_SPEC || DO_FOG) {
+	    if (DO_SPEC) {
+	       v[0].specular.red   = spec[0][0];
+	       v[0].specular.green = spec[0][1];
+	       v[0].specular.blue  = spec[0][2];
+	       STRIDE_4UB(spec, spec_stride);
+	    }
+	    if (DO_FOG) {
+	       v[0].specular.alpha = fog[0] * 255.0;
+	       STRIDE_F(fog, fog_stride);
+	    }
+	    if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
+	    v++;
+	 }
+	 if (DO_TEX0) {
+	    v[0].ui = tc0[0][0];
+	    v[1].ui = tc0[0][1];
+	    if (TCL_DEBUG) fprintf(stderr, "t0: %.2f %.2f ", v[0].f, v[1].f);
+	    if (DO_PTEX) {
+	       v[2].ui = tc0[0][3];
+	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc0 =  (GLuint (*)[4])((GLubyte *)tc0 +  tc0_stride);
+	 }
+	 if (DO_TEX1) {
+	    v[0].ui = tc1[0][0];
+	    v[1].ui = tc1[0][1];
+	    if (TCL_DEBUG) fprintf(stderr, "t1: %.2f %.2f ", v[0].f, v[1].f);
+	    if (DO_PTEX) {
+	       v[2].ui = tc1[0][3];
+	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc1 =  (GLuint (*)[4])((GLubyte *)tc1 +  tc1_stride);
+	 } 
+	 if (DO_TEX2) {
+	    v[0].ui = tc2[0][0];
+	    v[1].ui = tc2[0][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc2[0][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc2 =  (GLuint (*)[4])((GLubyte *)tc2 +  tc2_stride);
+	 } 
+	 if (TCL_DEBUG) fprintf(stderr, "\n");
+      }
+   } else {
+      for (i=start; i < end; i++) {
+	 v[0].ui = coord[i][0];
+	 v[1].ui = coord[i][1];
+	 v[2].ui = coord[i][2];
+	 if (DO_W) {
+	    v[3].ui = coord[i][3];
+	    v += 4;
+	 } 
+	 else
+	    v += 3;
+
+	 if (DO_NORM) {
+	    v[0].ui = norm[i][0];
+	    v[1].ui = norm[i][1];
+	    v[2].ui = norm[i][2];
+	    v += 3;
+	 }
+	 if (DO_RGBA) {
+	    v[0].ui = LE32_TO_CPU(*(GLuint *)&col[i]);
+	    v++;
+	 }
+	 if (DO_SPEC || DO_FOG) {
+	    if (DO_SPEC) {
+	       v[0].specular.red   = spec[i][0];
+	       v[0].specular.green = spec[i][1];
+	       v[0].specular.blue  = spec[i][2];
+	    }
+	    if (DO_FOG) {
+	       v[0].specular.alpha = fog[i] * 255.0;
+	    }
+	    v++;
+	 }
+	 if (DO_TEX0) {
+	    v[0].ui = tc0[i][0];
+	    v[1].ui = tc0[i][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc0[i][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	 }
+	 if (DO_TEX1) {
+	    v[0].ui = tc1[i][0];
+	    v[1].ui = tc1[i][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc1[i][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	 } 
+	 if (DO_TEX2) {
+	    v[0].ui = tc2[i][0];
+	    v[1].ui = tc2[i][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc2[i][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	 } 
+      }
+   }
+}
+
+
+
+static void TAG(init)( void )
+{
+   int sz = 3;
+   if (DO_W) sz++;
+   if (DO_NORM) sz += 3;
+   if (DO_RGBA) sz++;
+   if (DO_SPEC || DO_FOG) sz++;
+   if (DO_TEX0) sz += 2;
+   if (DO_TEX0 && DO_PTEX) sz++;
+   if (DO_TEX1) sz += 2;
+   if (DO_TEX1 && DO_PTEX) sz++;
+   if (DO_TEX2) sz += 2;
+   if (DO_TEX2 && DO_PTEX) sz++;
+
+   setup_tab[IDX].emit = TAG(emit);
+   setup_tab[IDX].vertex_format = IND;
+   setup_tab[IDX].vertex_size = sz;
+}
+
+
+#undef IND
+#undef TAG
+#undef IDX
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c
new file mode 100644
index 000000000..d91375489
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c
@@ -0,0 +1,336 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c,v 1.1 2002/10/30 12:51:55 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+
+#include "mmath.h"
+#include "mtypes.h"
+#include "enums.h"
+#include "colormac.h"
+#include "light.h"
+
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_imm_debug.h"
+
+#define RADEON_TCL_MAX_SETUP 13
+
+union emit_union { float f; GLuint ui; radeon_color_t specular; };
+
+static struct {
+   void   (*emit)( GLcontext *, GLuint, GLuint, void * );
+   GLuint vertex_size;
+   GLuint vertex_format;
+} setup_tab[RADEON_TCL_MAX_SETUP];
+
+#define DO_W    (IND & RADEON_CP_VC_FRMT_W0)
+#define DO_RGBA (IND & RADEON_CP_VC_FRMT_PKCOLOR)
+#define DO_SPEC (IND & RADEON_CP_VC_FRMT_PKSPEC)
+#define DO_FOG  (IND & RADEON_CP_VC_FRMT_PKSPEC)
+#define DO_TEX0 (IND & RADEON_CP_VC_FRMT_ST0)
+#define DO_TEX1 (IND & RADEON_CP_VC_FRMT_ST1)
+#define DO_PTEX (IND & RADEON_CP_VC_FRMT_Q0)
+#define DO_NORM (IND & RADEON_CP_VC_FRMT_N0)
+
+#define DO_TEX2 0
+#define DO_TEX3 0
+
+#define GET_TEXSOURCE(n)  n
+#define GET_UBYTE_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteColor
+#define GET_UBYTE_SPEC_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteSecondaryColor
+
+#define IMPORT_FLOAT_COLORS radeon_import_float_colors
+#define IMPORT_FLOAT_SPEC_COLORS radeon_import_float_spec_colors
+
+/***********************************************************************
+ *             Generate vertex emit functions               *
+ ***********************************************************************/
+
+
+/* Defined in order of increasing vertex size:
+ */
+#define IDX 0
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR)
+#define TAG(x) x##_rgba
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 1
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 2
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0)
+#define TAG(x) x##_rgba_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 3
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 4
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 5
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_st_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 6
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 7
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_spec_st_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 8
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 9
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgpa_spec_st_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 10
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 11
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq_stq
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 12
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_W0|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_w_rgpa_spec_stq_stq_n
+#include "radeon_maos_vbtmp.h"
+
+
+
+
+
+/***********************************************************************
+ *                         Initialization 
+ ***********************************************************************/
+
+
+static void init_tcl_verts( void )
+{
+   init_rgba();
+   init_n();
+   init_rgba_n();
+   init_rgba_st();
+   init_st_n();
+   init_rgba_st_st();
+   init_rgba_st_n();
+   init_rgba_spec_st_st();
+   init_st_st_n();
+   init_rgpa_spec_st_st_n();
+   init_rgba_stq();
+   init_rgba_stq_stq();
+   init_w_rgpa_spec_stq_stq_n();
+}
+
+
+void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint req = 0;
+   GLuint vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
+		 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1));
+   int i;
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_tcl_verts();
+      firsttime = 0;
+   }
+		     
+   if (1) {
+      req |= RADEON_CP_VC_FRMT_Z;
+      if (VB->ObjPtr->size == 4) {
+	 req |= RADEON_CP_VC_FRMT_W0;
+      }
+   }
+
+   if (inputs & VERT_NORM) {
+      req |= RADEON_CP_VC_FRMT_N0;
+   }
+   
+   if (inputs & VERT_RGBA) {
+      req |= RADEON_CP_VC_FRMT_PKCOLOR;
+   }
+
+   if (inputs & VERT_SPEC_RGB) {
+      req |= RADEON_CP_VC_FRMT_PKSPEC;
+   }
+
+   if (inputs & VERT_TEX0) {
+      req |= RADEON_CP_VC_FRMT_ST0;
+
+      if (VB->TexCoordPtr[0]->size == 4) {
+	 req |= RADEON_CP_VC_FRMT_Q0;
+	 vtx |= RADEON_TCL_VTX_Q0;
+      }
+   }
+
+   if (inputs & VERT_TEX1) {
+      req |= RADEON_CP_VC_FRMT_ST1;
+
+      if (VB->TexCoordPtr[1]->size == 4) {
+	 req |= RADEON_CP_VC_FRMT_Q1;
+	 vtx |= RADEON_TCL_VTX_Q1;
+      }
+   }
+
+   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
+   }
+
+   for (i = 0 ; i < RADEON_TCL_MAX_SETUP ; i++) 
+      if ((setup_tab[i].vertex_format & req) == req) 
+	 break;
+
+   if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
+       rmesa->tcl.indexed_verts.buf)
+      return;
+
+   if (rmesa->tcl.indexed_verts.buf)
+      radeonReleaseArrays( ctx, ~0 );
+
+   radeonAllocDmaRegionVerts( rmesa, 
+			      &rmesa->tcl.indexed_verts, 
+			      VB->Count,
+			      setup_tab[i].vertex_size * 4, 
+			      4);
+
+   setup_tab[i].emit( ctx, 0, VB->Count, 
+		      rmesa->tcl.indexed_verts.address + 
+		      rmesa->tcl.indexed_verts.start );
+
+   rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
+   rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
+   rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
+   rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
+
+   rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts;
+   rmesa->tcl.nr_aos_components = 1;
+}
+
+
+
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, newinputs );
+
+   if (newinputs) 
+     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c
new file mode 100644
index 000000000..51c1d45c5
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c
@@ -0,0 +1,979 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c,v 1.1 2002/10/30 12:51:55 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc, Cedar Park, TX.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_sanity.h"
+
+/* Set this '1' to get more verbiage.
+ */
+#define MORE_VERBOSE 1
+
+#if MORE_VERBOSE
+#define VERBOSE (RADEON_DEBUG & DEBUG_VERBOSE)
+#define NORMAL  (1)
+#else
+#define VERBOSE 0
+#define NORMAL  (RADEON_DEBUG & DEBUG_VERBOSE)
+#endif
+
+
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.  
+ */
+static struct { 
+   int start; 
+   int len; 
+   const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+   { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+   { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+   { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+   { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+   { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+   { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+   { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+   { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+   { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+   { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+};
+
+struct reg_names {
+   int idx;
+   const char *name;
+};
+
+static struct reg_names reg_names[] = {
+   { RADEON_PP_MISC, "RADEON_PP_MISC" },
+   { RADEON_PP_FOG_COLOR, "RADEON_PP_FOG_COLOR" },
+   { RADEON_RE_SOLID_COLOR, "RADEON_RE_SOLID_COLOR" },
+   { RADEON_RB3D_BLENDCNTL, "RADEON_RB3D_BLENDCNTL" },
+   { RADEON_RB3D_DEPTHOFFSET, "RADEON_RB3D_DEPTHOFFSET" },
+   { RADEON_RB3D_DEPTHPITCH, "RADEON_RB3D_DEPTHPITCH" },
+   { RADEON_RB3D_ZSTENCILCNTL, "RADEON_RB3D_ZSTENCILCNTL" },
+   { RADEON_PP_CNTL, "RADEON_PP_CNTL" },
+   { RADEON_RB3D_CNTL, "RADEON_RB3D_CNTL" },
+   { RADEON_RB3D_COLOROFFSET, "RADEON_RB3D_COLOROFFSET" },
+   { RADEON_RB3D_COLORPITCH, "RADEON_RB3D_COLORPITCH" },
+   { RADEON_SE_CNTL, "RADEON_SE_CNTL" },
+   { RADEON_SE_COORD_FMT, "RADEON_SE_COORDFMT" },
+   { RADEON_SE_CNTL_STATUS, "RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_LINE_PATTERN, "RADEON_RE_LINE_PATTERN" },
+   { RADEON_RE_LINE_STATE, "RADEON_RE_LINE_STATE" },
+   { RADEON_SE_LINE_WIDTH, "RADEON_SE_LINE_WIDTH" },
+   { RADEON_RB3D_STENCILREFMASK, "RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_RB3D_ROPCNTL, "RADEON_RB3D_ROPCNTL" },
+   { RADEON_RB3D_PLANEMASK, "RADEON_RB3D_PLANEMASK" },
+   { RADEON_SE_VPORT_XSCALE, "RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_VPORT_XOFFSET, "RADEON_SE_VPORT_XOFFSET" },
+   { RADEON_SE_VPORT_YSCALE, "RADEON_SE_VPORT_YSCALE" },
+   { RADEON_SE_VPORT_YOFFSET, "RADEON_SE_VPORT_YOFFSET" },
+   { RADEON_SE_VPORT_ZSCALE, "RADEON_SE_VPORT_ZSCALE" },
+   { RADEON_SE_VPORT_ZOFFSET, "RADEON_SE_VPORT_ZOFFSET" },
+   { RADEON_RE_MISC, "RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0, "RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_TXFILTER_1, "RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_TXFILTER_2, "RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_TXFORMAT_0, "RADEON_PP_TXFORMAT_0" },
+   { RADEON_PP_TXFORMAT_1, "RADEON_PP_TXFORMAT_1" },
+   { RADEON_PP_TXFORMAT_2, "RADEON_PP_TXFORMAT_3" },
+   { RADEON_PP_TXOFFSET_0, "RADEON_PP_TXOFFSET_0" },
+   { RADEON_PP_TXOFFSET_1, "RADEON_PP_TXOFFSET_1" },
+   { RADEON_PP_TXOFFSET_2, "RADEON_PP_TXOFFSET_3" },
+   { RADEON_PP_TXCBLEND_0, "RADEON_PP_TXCBLEND_0" },
+   { RADEON_PP_TXCBLEND_1, "RADEON_PP_TXCBLEND_1" },
+   { RADEON_PP_TXCBLEND_2, "RADEON_PP_TXCBLEND_3" },
+   { RADEON_PP_TXABLEND_0, "RADEON_PP_TXABLEND_0" },
+   { RADEON_PP_TXABLEND_1, "RADEON_PP_TXABLEND_1" },
+   { RADEON_PP_TXABLEND_2, "RADEON_PP_TXABLEND_3" },
+   { RADEON_PP_TFACTOR_0, "RADEON_PP_TFACTOR_0" },
+   { RADEON_PP_TFACTOR_1, "RADEON_PP_TFACTOR_1" },
+   { RADEON_PP_TFACTOR_2, "RADEON_PP_TFACTOR_3" },
+   { RADEON_PP_BORDER_COLOR_0, "RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_BORDER_COLOR_1, "RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_BORDER_COLOR_2, "RADEON_PP_BORDER_COLOR_3" },
+   { RADEON_SE_ZBIAS_FACTOR, "RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_ZBIAS_CONSTANT, "RADEON_SE_ZBIAS_CONSTANT" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT, "RADEON_SE_TCL_OUTPUT_VTXFMT" },
+   { RADEON_SE_TCL_OUTPUT_VTX_SEL, "RADEON_SE_TCL_OUTPUT_VTXSEL" },
+   { RADEON_SE_TCL_MATRIX_SELECT_0, "RADEON_SE_TCL_MATRIX_SELECT_0" },
+   { RADEON_SE_TCL_MATRIX_SELECT_1, "RADEON_SE_TCL_MATRIX_SELECT_1" },
+   { RADEON_SE_TCL_UCP_VERT_BLEND_CTL, "RADEON_SE_TCL_UCP_VERT_BLEND_CTL" },
+   { RADEON_SE_TCL_TEXTURE_PROC_CTL, "RADEON_SE_TCL_TEXTURE_PROC_CTL" },
+   { RADEON_SE_TCL_LIGHT_MODEL_CTL, "RADEON_SE_TCL_LIGHT_MODEL_CTL" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_0, "RADEON_SE_TCL_PER_LIGHT_CTL_0" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_1, "RADEON_SE_TCL_PER_LIGHT_CTL_1" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_2, "RADEON_SE_TCL_PER_LIGHT_CTL_2" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_3, "RADEON_SE_TCL_PER_LIGHT_CTL_3" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, "RADEON_SE_TCL_EMMISSIVE_RED" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_GREEN, "RADEON_SE_TCL_EMMISSIVE_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_BLUE, "RADEON_SE_TCL_EMMISSIVE_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_ALPHA, "RADEON_SE_TCL_EMMISSIVE_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_RED, "RADEON_SE_TCL_AMBIENT_RED" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN, "RADEON_SE_TCL_AMBIENT_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE, "RADEON_SE_TCL_AMBIENT_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_ALPHA, "RADEON_SE_TCL_AMBIENT_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_RED, "RADEON_SE_TCL_DIFFUSE_RED" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_GREEN, "RADEON_SE_TCL_DIFFUSE_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_BLUE, "RADEON_SE_TCL_DIFFUSE_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_ALPHA, "RADEON_SE_TCL_DIFFUSE_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_RED, "RADEON_SE_TCL_SPECULAR_RED" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN, "RADEON_SE_TCL_SPECULAR_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE, "RADEON_SE_TCL_SPECULAR_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA, "RADEON_SE_TCL_SPECULAR_ALPHA" },
+   { RADEON_SE_TCL_SHININESS, "RADEON_SE_TCL_SHININESS" },
+   { RADEON_SE_COORD_FMT, "RADEON_SE_COORD_FMT" }
+};
+
+static struct reg_names scalar_names[] = {
+   { RADEON_SS_LIGHT_DCD_ADDR, "LIGHT_DCD" },
+   { RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR, "LIGHT_SPOT_EXPONENT" },
+   { RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR, "LIGHT_SPOT_CUTOFF" },
+   { RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR, "LIGHT_SPECULAR_THRESH" },
+   { RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR, "LIGHT_RANGE_CUTOFF" },
+   { RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, "VERT_GUARD_CLIP" },
+   { RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR, "VERT_GUARD_DISCARD" },
+   { RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR, "HORZ_GUARD_CLIP" },
+   { RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR, "HORZ_GUARD_DISCARD" },
+   { RADEON_SS_SHININESS, "SHININESS" },
+   { 1000, "" },
+};
+
+/* Puff these out to make them look like normal (dword) registers.
+ */
+static struct reg_names vector_names[] = {
+   { RADEON_VS_MATRIX_0_ADDR * 4, "MATRIX_0" },
+   { RADEON_VS_MATRIX_1_ADDR * 4, "MATRIX_1" },
+   { RADEON_VS_MATRIX_2_ADDR * 4, "MATRIX_2" },
+   { RADEON_VS_MATRIX_3_ADDR * 4, "MATRIX_3" },
+   { RADEON_VS_MATRIX_4_ADDR * 4, "MATRIX_4" },
+   { RADEON_VS_MATRIX_5_ADDR * 4, "MATRIX_5" },
+   { RADEON_VS_MATRIX_6_ADDR * 4, "MATRIX_6" },
+   { RADEON_VS_MATRIX_7_ADDR * 4, "MATRIX_7" },
+   { RADEON_VS_MATRIX_8_ADDR * 4, "MATRIX_8" },
+   { RADEON_VS_MATRIX_9_ADDR * 4, "MATRIX_9" },
+   { RADEON_VS_MATRIX_10_ADDR * 4, "MATRIX_10" },
+   { RADEON_VS_MATRIX_11_ADDR * 4, "MATRIX_11" },
+   { RADEON_VS_MATRIX_12_ADDR * 4, "MATRIX_12" },
+   { RADEON_VS_MATRIX_13_ADDR * 4, "MATRIX_13" },
+   { RADEON_VS_MATRIX_14_ADDR * 4, "MATRIX_14" },
+   { RADEON_VS_MATRIX_15_ADDR * 4, "MATRIX_15" },
+   { RADEON_VS_LIGHT_AMBIENT_ADDR * 4, "LIGHT_AMBIENT" },
+   { RADEON_VS_LIGHT_DIFFUSE_ADDR * 4, "LIGHT_DIFFUSE" },
+   { RADEON_VS_LIGHT_SPECULAR_ADDR * 4, "LIGHT_SPECULAR" },
+   { RADEON_VS_LIGHT_DIRPOS_ADDR * 4, "LIGHT_DIRPOS" },
+   { RADEON_VS_LIGHT_HWVSPOT_ADDR * 4, "LIGHT_HWVSPOT" },
+   { RADEON_VS_LIGHT_ATTENUATION_ADDR * 4, "LIGHT_ATTENUATION" },
+   { RADEON_VS_MATRIX_EYE2CLIP_ADDR * 4, "MATRIX_EYE2CLIP" },
+   { RADEON_VS_UCP_ADDR * 4, "UCP" },
+   { RADEON_VS_GLOBAL_AMBIENT_ADDR * 4, "GLOBAL_AMBIENT" },
+   { RADEON_VS_FOG_PARAM_ADDR * 4, "FOG_PARAM" },
+   { RADEON_VS_EYE_VECTOR_ADDR * 4, "EYE_VECTOR" },
+   { 1000, "" },
+};
+
+union fi { float f; int i; };
+
+#define ISVEC   1
+#define ISFLOAT 2
+#define TOUCHED 4
+
+struct reg {
+   int idx; 
+   struct reg_names *closest;
+   int flags;
+   union fi current;
+   union fi *values;
+   int nvalues;
+   int nalloc;
+   float vmin, vmax;
+};
+
+
+static struct reg regs[Elements(reg_names)+1];
+static struct reg scalars[512+1];
+static struct reg vectors[512*4+1];
+
+static int total, total_changed, bufs;
+
+static void init_regs( void )
+{
+   struct reg_names *tmp;
+   int i;
+
+   for (i = 0 ; i < Elements(regs) ; i++) {
+      regs[i].idx = reg_names[i].idx;
+      regs[i].closest = &reg_names[i];
+      regs[i].flags = 0;
+   }
+
+   for (i = 0, tmp = scalar_names ; i < Elements(scalars) ; i++) {
+      if (tmp[1].idx == i) tmp++;
+      scalars[i].idx = i;
+      scalars[i].closest = tmp;
+      scalars[i].flags = ISFLOAT;
+   }
+
+   for (i = 0, tmp = vector_names ; i < Elements(vectors) ; i++) {
+      if (tmp[1].idx*4 == i) tmp++;
+      vectors[i].idx = i;
+      vectors[i].closest = tmp;
+      vectors[i].flags = ISFLOAT|ISVEC;
+   }
+
+   regs[Elements(regs)-1].idx = -1;
+   scalars[Elements(scalars)-1].idx = -1;
+   vectors[Elements(vectors)-1].idx = -1;
+}
+
+static int find_or_add_value( struct reg *reg, int val )
+{
+   int j;
+
+   for ( j = 0 ; j < reg->nvalues ; j++)
+      if ( val == reg->values[j].i )
+	 return 1;
+
+   if (j == reg->nalloc) {
+      reg->nalloc += 5;
+      reg->nalloc *= 2;
+      reg->values = (union fi *) realloc( reg->values, 
+					  reg->nalloc * sizeof(union fi) );
+   }
+
+   reg->values[reg->nvalues++].i = val;
+   return 0;
+}
+
+static struct reg *lookup_reg( struct reg *tab, int reg )
+{
+   int i;
+
+   for (i = 0 ; tab[i].idx != -1 ; i++) {
+      if (tab[i].idx == reg)
+	 return &tab[i];
+   }
+
+   fprintf(stderr, "*** unknown reg 0x%x\n", reg);
+   return 0;
+}
+
+
+static const char *get_reg_name( struct reg *reg )
+{
+   static char tmp[80];
+
+   if (reg->idx == reg->closest->idx) 
+      return reg->closest->name;
+
+   
+   if (reg->flags & ISVEC) {
+      if (reg->idx/4 != reg->closest->idx)
+	 sprintf(tmp, "%s+%d[%d]", 
+		 reg->closest->name, 
+		 (reg->idx/4) - reg->closest->idx,
+		 reg->idx%4);
+      else
+	 sprintf(tmp, "%s[%d]", reg->closest->name, reg->idx%4);
+   }
+   else {
+      if (reg->idx != reg->closest->idx)
+	 sprintf(tmp, "%s+%d", reg->closest->name, reg->idx - reg->closest->idx);
+      else
+	 sprintf(tmp, "%s", reg->closest->name);
+   }
+
+   return tmp;
+}
+
+static int print_int_reg_assignment( struct reg *reg, int data )
+{
+   int changed = (reg->current.i != data);
+   int ever_seen = find_or_add_value( reg, data );
+   
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+       fprintf(stderr, "   %s <-- 0x%x", get_reg_name(reg), data);
+       
+   if (NORMAL) {
+      if (!ever_seen) 
+	 fprintf(stderr, " *** BRAND NEW VALUE");
+      else if (changed) 
+	 fprintf(stderr, " *** CHANGED"); 
+   }
+   
+   reg->current.i = data;
+
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+
+static int print_float_reg_assignment( struct reg *reg, float data )
+{
+   int changed = (reg->current.f != data);
+   int newmin = (data < reg->vmin);
+   int newmax = (data > reg->vmax);
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "   %s <-- %.3f", get_reg_name(reg), data);
+
+   if (NORMAL) {
+      if (newmin) {
+	 fprintf(stderr, " *** NEW MIN (prev %.3f)", reg->vmin);
+	 reg->vmin = data;
+      }
+      else if (newmax) {
+	 fprintf(stderr, " *** NEW MAX (prev %.3f)", reg->vmax);
+	 reg->vmax = data;
+      }
+      else if (changed) {
+	 fprintf(stderr, " *** CHANGED");
+      }
+   }
+
+   reg->current.f = data;
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+static int print_reg_assignment( struct reg *reg, int data )
+{
+   reg->flags |= TOUCHED;
+   if (reg->flags & ISFLOAT)
+      return print_float_reg_assignment( reg, *(float *)&data );
+   else
+      return print_int_reg_assignment( reg, data );
+}
+
+static void print_reg( struct reg *reg )
+{
+   if (reg->flags & TOUCHED) {
+      if (reg->flags & ISFLOAT) {
+	 fprintf(stderr, "   %s == %f\n", get_reg_name(reg), reg->current.f);
+      } else {
+	 fprintf(stderr, "   %s == 0x%x\n", get_reg_name(reg), reg->current.i);
+      }
+   }
+}
+
+
+static void dump_state( void )
+{
+   int i;
+
+   for (i = 0 ; i < Elements(regs) ; i++) 
+      print_reg( &regs[i] );
+
+   for (i = 0 ; i < Elements(scalars) ; i++) 
+      print_reg( &scalars[i] );
+
+   for (i = 0 ; i < Elements(vectors) ; i++) 
+      print_reg( &vectors[i] );
+}
+
+
+
+static int radeon_emit_packets( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int id = (int)header.packet.packet_id;
+   int sz = packet[id].len;
+   int *data = (int *)cmdbuf->buf;
+   int i;
+   
+   if (sz * sizeof(int) > cmdbuf->bufsz) {
+      fprintf(stderr, "Packet overflows cmdbuf\n");      
+      return -EINVAL;
+   }
+
+   if (!packet[id].name) {
+      fprintf(stderr, "*** Unknown packet 0 nr %d\n", id );
+      return -EINVAL;
+   }
+
+   
+   if (VERBOSE) 
+      fprintf(stderr, "Packet 0 reg %s nr %d\n", packet[id].name, sz );
+
+   for ( i = 0 ; i < sz ; i++) {
+      struct reg *reg = lookup_reg( regs, packet[id].start + i*4 );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int radeon_emit_scalars( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset;
+   int stride = header.scalars.stride;
+   int i;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars, start %d stride %d nr %d (end %d)\n",
+	      start, stride, sz, start + stride * sz);
+
+
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+	 
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int radeon_emit_scalars2( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset + 0x100;
+   int stride = header.scalars.stride;
+   int i;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars2, start %d stride %d nr %d (end %d)\n",
+	      start, stride, sz, start + stride * sz);
+
+   if (start + stride * sz > 257) {
+      fprintf(stderr, "emit scalars OVERFLOW %d/%d/%d\n", start, stride, sz);
+      return -1;
+   }
+
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+	 
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+/* Check: inf/nan/extreme-size?
+ * Check: table start, end, nr, etc.
+ */
+static int radeon_emit_vectors( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int sz = header.vectors.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.vectors.offset;
+   int stride = header.vectors.stride;
+   int i,j;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit vectors, start %d stride %d nr %d (end %d) (0x%x)\n",
+	      start, stride, sz, start + stride * sz, header.i);
+
+/*    if (start + stride * (sz/4) > 128) { */
+/*       fprintf(stderr, "emit vectors OVERFLOW %d/%d/%d\n", start, stride, sz); */
+/*       return -1; */
+/*    } */
+
+   for (i = 0 ; i < sz ;  start += stride) {
+      int changed = 0;
+      for (j = 0 ; j < 4 ; i++,j++) {
+	 struct reg *reg = lookup_reg( vectors, start*4+j );
+	 if (print_reg_assignment( reg, data[i] ))
+	    changed = 1;
+      }
+      if (changed)
+	 total_changed += 4;
+      total += 4;
+   }
+	 
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int print_vertex_format( int vfmt )
+{
+   if (NORMAL) {
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+	      "vertex format",
+	      vfmt,
+	      "xy,",
+	      (vfmt & RADEON_CP_VC_FRMT_Z) ? "z," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_W0) ? "w0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPCOLOR) ? "fpcolor," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPALPHA) ? "fpalpha," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_PKCOLOR) ? "pkcolor," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPSPEC) ? "fpspec," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPFOG) ? "fpfog," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_PKSPEC) ? "pkspec," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST0) ? "st0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST1) ? "st1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q1) ? "q1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST2) ? "st2," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q2) ? "q2," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST3) ? "st3," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q3) ? "q3," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q0) ? "q0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_N0) ? "n0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_XY1) ? "xy1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Z1) ? "z1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_W1) ? "w1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_N1) ? "n1," : "");
+
+   
+/*       if (!find_or_add_value( &others[V_VTXFMT], vfmt )) */
+/* 	 fprintf(stderr, " *** NEW VALUE"); */
+
+      fprintf(stderr, "\n");
+   }
+
+   return 0;
+}
+
+static char *primname[0xf] = {
+   "NONE",
+   "POINT",
+   "LINE",
+   "LINE_STRIP",
+   "TRI_LIST",
+   "TRI_FAN",
+   "TRI_STRIP",
+   "TRI_TYPE_2",
+   "RECT_LIST",
+   "3VRT_POINT_LIST",
+   "3VRT_LINE_LIST",
+};
+
+static int print_prim_and_flags( int prim )
+{
+   int numverts;
+   
+   if (NORMAL)
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s\n",
+	      "prim flags",
+	      prim,
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_IND) ? "IND," : "",
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_LIST) ? "LIST," : "",
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_RING) ? "RING," : "",
+	      (prim & RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA) ? "RGBA," : "BGRA, ",
+	      (prim & RADEON_CP_VC_CNTL_MAOS_ENABLE) ? "MAOS," : "",
+	      (prim & RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE) ? "RADEON," : "",
+	      (prim & RADEON_CP_VC_CNTL_TCL_ENABLE) ? "TCL," : "");
+
+   if ((prim & 0xf) > RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST) {
+      fprintf(stderr, "   *** Bad primitive: %x\n", prim & 0xf);
+      return -1;
+   }
+
+   numverts = prim>>16;
+   
+   if (NORMAL)
+      fprintf(stderr, "   prim: %s numverts %d\n", primname[prim&0xf], numverts);
+
+   switch (prim & 0xf) {
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_NONE:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_POINT:
+      if (numverts < 1) {
+	 fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE:
+      if ((numverts & 1) || numverts == 0) {
+	 fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP:
+      if (numverts < 2) {
+	 fprintf(stderr, "Bad nr verts for line_strip %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_POINT_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST:
+      if (numverts % 3 || numverts == 0) {
+	 fprintf(stderr, "Bad nr verts for tri %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP:
+      if (numverts < 3) {
+	 fprintf(stderr, "Bad nr verts for strip/fan %d\n", numverts);
+	 return -1;
+      }
+      break;
+   default:
+      fprintf(stderr, "Bad primitive\n");
+      return -1;
+   }	
+   return 0;
+}
+
+/* build in knowledge about each packet type
+ */
+static int radeon_emit_packet3( drmRadeonCmdBuffer *cmdbuf )
+{
+   int cmdsz;
+   int *cmd = (int *)cmdbuf->buf;
+   int *tmp;
+   int i, stride, size, start;
+
+   cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+
+   if ((cmd[0] & RADEON_CP_PACKET_MASK) != RADEON_CP_PACKET3 ||
+       cmdsz * 4 > cmdbuf->bufsz ||
+       cmdsz > RADEON_CP_PACKET_MAX_DWORDS) {
+      fprintf(stderr, "Bad packet\n");
+      return -EINVAL;
+   }
+
+   switch( cmd[0] & ~RADEON_CP_PACKET_COUNT_MASK ) {
+   case RADEON_CP_PACKET3_NOP:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_NOP, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_NEXT_CHAR:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_NEXT_CHAR, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_PLY_NEXTSCAN:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_PLY_NEXTSCAN, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_SET_SCISSORS:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_SET_SCISSORS, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_RNDR_GEN_INDX_PRIM, %d dwords\n",
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_LOAD_MICROCODE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_LOAD_MICROCODE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_WAIT_FOR_IDLE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_WAIT_FOR_IDLE, %d dwords\n", cmdsz);
+      break;
+
+   case RADEON_CP_PACKET3_3D_DRAW_VBUF:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_VBUF, %d dwords\n", cmdsz);
+      print_vertex_format(cmd[1]);
+      print_prim_and_flags(cmd[2]);
+      break;
+
+   case RADEON_CP_PACKET3_3D_DRAW_IMMD:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_IMMD, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_DRAW_INDX: {
+      int neltdwords;
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_INDX, %d dwords\n", cmdsz);
+      print_vertex_format(cmd[1]);
+      print_prim_and_flags(cmd[2]);
+      neltdwords = cmd[2]>>16;
+      neltdwords += neltdwords & 1;
+      neltdwords /= 2;
+      if (neltdwords + 3 != cmdsz)
+	 fprintf(stderr, "Mismatch in DRAW_INDX, %d vs cmdsz %d\n",
+		 neltdwords, cmdsz);
+      break;
+   }
+   case RADEON_CP_PACKET3_LOAD_PALETTE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_LOAD_PALETTE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_LOAD_VBPNTR:
+      if (NORMAL) {
+	 fprintf(stderr, "PACKET3_3D_LOAD_VBPNTR, %d dwords\n", cmdsz);
+	 fprintf(stderr, "   nr arrays: %d\n", cmd[1]);
+      }
+
+      if (cmd[1]/2 + cmd[1]%2 != cmdsz - 3) {
+	 fprintf(stderr, "  ****** MISMATCH %d/%d *******\n",
+		 cmd[1]/2 + cmd[1]%2 + 3, cmdsz);
+	 return -EINVAL;
+      }
+
+      if (NORMAL) {
+	 tmp = cmd+2;
+	 for (i = 0 ; i < cmd[1] ; i++) {
+	    if (i & 1) {
+	       stride = (tmp[0]>>24) & 0xff;
+	       size = (tmp[0]>>16) & 0xff;
+	       start = tmp[2];
+	       tmp += 3;
+	    }
+	    else {
+	       stride = (tmp[0]>>8) & 0xff;
+	       size = (tmp[0]) & 0xff;
+	       start = tmp[1];
+	    }
+	    fprintf(stderr, "   array %d: start 0x%x vsize %d vstride %d\n",
+		    i, start, size, stride );
+	 }
+      }
+      break;
+   case RADEON_CP_PACKET3_CNTL_PAINT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_PAINT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_BITBLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_BITBLT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_SMALLTEXT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_SMALLTEXT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_HOSTDATA_BLT, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_POLYLINE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_POLYLINE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_POLYSCANLINES:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_POLYSCANLINES, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_PAINT_MULTI:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_PAINT_MULTI, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_BITBLT_MULTI:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_BITBLT_MULTI, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_TRANS_BITBLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_TRANS_BITBLT, %d dwords\n", 
+	      cmdsz);
+      break;
+   default:
+      fprintf(stderr, "UNKNOWN PACKET, %d dwords\n", cmdsz);
+      break;
+   }
+      
+   cmdbuf->buf += cmdsz * 4;
+   cmdbuf->bufsz -= cmdsz * 4;
+   return 0;
+}
+
+
+/* Check cliprects for bounds, then pass on to above:
+ */
+static int radeon_emit_packet3_cliprect( drmRadeonCmdBuffer *cmdbuf )
+{   
+   XF86DRIClipRectRec *boxes = (XF86DRIClipRectRec *)cmdbuf->boxes;
+   int i = 0;
+
+   if (VERBOSE && total_changed) {
+      dump_state();
+      total_changed = 0;
+   }
+   else fprintf(stderr, "total_changed zero\n");
+
+   if (NORMAL) {
+      do {
+	 if ( i < cmdbuf->nbox ) {
+	    fprintf(stderr, "Emit box %d/%d %d,%d %d,%d\n",
+		    i, cmdbuf->nbox,
+		    boxes[i].x1, boxes[i].y1, boxes[i].x2, boxes[i].y2);
+	 }
+      } while ( ++i < cmdbuf->nbox );
+   }
+
+   if (cmdbuf->nbox == 1)
+      cmdbuf->nbox = 0;
+
+   return radeon_emit_packet3( cmdbuf );
+}
+
+
+int radeonSanityCmdBuffer( radeonContextPtr rmesa,
+			   int nbox,
+			   XF86DRIClipRectRec *boxes )
+{
+   int idx;
+   drmRadeonCmdBuffer cmdbuf;
+   drmRadeonCmdHeader header;
+   static int inited = 0;
+
+   if (!inited) {
+      init_regs();
+      inited = 1;
+   }
+
+   cmdbuf.buf = rmesa->store.cmd_buf;
+   cmdbuf.bufsz = rmesa->store.cmd_used;
+   cmdbuf.boxes = (drmClipRect *)boxes;
+   cmdbuf.nbox = nbox;
+
+   while ( cmdbuf.bufsz >= sizeof(header) ) {
+		
+      header.i = *(int *)cmdbuf.buf;
+      cmdbuf.buf += sizeof(header);
+      cmdbuf.bufsz -= sizeof(header);
+
+      switch (header.header.cmd_type) {
+      case RADEON_CMD_PACKET: 
+	 if (radeon_emit_packets( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packets failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_SCALARS:
+	 if (radeon_emit_scalars( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_scalars failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_SCALARS2:
+	 if (radeon_emit_scalars2( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_scalars failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_VECTORS:
+	 if (radeon_emit_vectors( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_vectors failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_DMA_DISCARD:
+	 idx = header.dma.buf_idx;
+	 if (NORMAL)
+	    fprintf(stderr, "RADEON_CMD_DMA_DISCARD buf %d\n", idx);
+	 bufs++;
+	 break;
+
+      case RADEON_CMD_PACKET3:
+	 if (radeon_emit_packet3( &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packet3 failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_PACKET3_CLIP:
+	 if (radeon_emit_packet3_cliprect( &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packet3_clip failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_WAIT:
+	 break;
+
+      default:
+	 fprintf(stderr,"bad cmd_type %d at %p\n", 
+		   header.header.cmd_type,
+		   cmdbuf.buf - sizeof(header));
+	 return -EINVAL;
+      }
+   }
+
+   if (0)
+   {
+      static int n = 0;
+      n++;
+      if (n == 10) {
+	 fprintf(stderr, "Bufs %d Total emitted %d real changes %d (%.2f%%)\n",
+		 bufs,
+		 total, total_changed, 
+		 ((float)total_changed/(float)total*100.0));
+	 fprintf(stderr, "Total emitted per buf: %.2f\n",
+		 (float)total/(float)bufs);
+	 fprintf(stderr, "Real changes per buf: %.2f\n",
+		 (float)total_changed/(float)bufs);
+
+	 bufs = n = total = total_changed = 0;
+      }
+   }
+
+   return 0;
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.h
new file mode 100644
index 000000000..58e8335dd
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.h
@@ -0,0 +1,8 @@
+#ifndef RADEON_SANITY_H
+#define RADEON_SANITY_H
+
+extern int radeonSanityCmdBuffer( radeonContextPtr rmesa,
+				  int nbox,
+				  XF86DRIClipRectRec *boxes );
+
+#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c
index 443cdfc3a..a45974624 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c,v 1.4 2002/02/22 21:45:00 dawes Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c,v 1.6 2002/12/16 16:18:58 dawes Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -35,11 +35,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "radeon_screen.h"
-#include "radeon_context.h"
-#include "radeon_ioctl.h"
-
 #include "mem.h"
 
+
 #if 1
 /* Including xf86PciInfo.h introduces a bunch of errors...
  */
@@ -47,6 +45,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define PCI_CHIP_RADEON_QE	0x5145
 #define PCI_CHIP_RADEON_QF	0x5146
 #define PCI_CHIP_RADEON_QG	0x5147
+
+#define PCI_CHIP_RADEON_QY	0x5159
+#define PCI_CHIP_RADEON_QZ	0x515A
+
+#define PCI_CHIP_RADEON_LW	0x4C57 /* mobility 7 - has tcl */
+
+#define PCI_CHIP_RADEON_LY	0x4C59
+#define PCI_CHIP_RADEON_LZ	0x4C5A
+
+#define PCI_CHIP_RV200_QW	0x5157 /* a confusing name for a radeon */
 #endif
 
 
@@ -62,7 +70,9 @@ radeonScreenPtr radeonCreateScreen( __DRIscreenPrivate *sPriv )
       int major, minor, patch;
       if ( XF86DRIQueryVersion( sPriv->display, &major, &minor, &patch ) ) {
          if ( major != 4 || minor < 0 ) {
-            __driUtilMessage( "Radeon DRI driver expected DRI version 4.0.x but got version %d.%d.%d", major, minor, patch );
+            __driUtilMessage( "Radeon DRI driver expected DRI version 4.0.x "
+			      "but got version %d.%d.%d",
+			      major, minor, patch );
             return NULL;
          }
       }
@@ -71,29 +81,78 @@ radeonScreenPtr radeonCreateScreen( __DRIscreenPrivate *sPriv )
    /* Check that the DDX driver version is compatible */
    if ( sPriv->ddxMajor != 4 ||
 	sPriv->ddxMinor < 0 ) {
-      __driUtilMessage( "Radeon DRI driver expected DDX driver version 4.0.x but got version %d.%d.%d", sPriv->ddxMajor, sPriv->ddxMinor, sPriv->ddxPatch );
+      __driUtilMessage( "Radeon DRI driver expected DDX driver version 4.0.x "
+			"but got version %d.%d.%d", 
+			sPriv->ddxMajor, sPriv->ddxMinor, sPriv->ddxPatch );
       return NULL;
    }
 
    /* Check that the DRM driver version is compatible */
+   /* KW:  Check minor number here too -- compatibility mode is broken
+    * atm. 
+    */
    if ( sPriv->drmMajor != 1 ||
-	sPriv->drmMinor < 2 ) {
-      __driUtilMessage( "Radeon DRI driver expected DRM driver version 1.2.x but got version %d.%d.%d", sPriv->drmMajor, sPriv->drmMinor, sPriv->drmPatch );
+	sPriv->drmMinor < 3) {
+      __driUtilMessage( "Radeon DRI driver expected DRM driver version 1.3.x "
+			"or newer but got version %d.%d.%d", 
+			sPriv->drmMajor, sPriv->drmMinor, sPriv->drmPatch );
       return NULL;
    }
 
+
    /* Allocate the private area */
    radeonScreen = (radeonScreenPtr) CALLOC( sizeof(*radeonScreen) );
    if ( !radeonScreen ) {
-      __driUtilMessage("radeonCreateScreen(): CALLOC radeonScreen struct failed");
+      __driUtilMessage("%s: CALLOC radeonScreen struct failed",
+		       __FUNCTION__);
       return NULL;
    }
 
+   if ( sPriv->drmMinor < 3 ||
+        getenv("RADEON_COMPAT")) {
+	   fprintf( stderr, "Radeon DRI driver:\n\t"
+		    "Compatibility mode for DRM driver version %d.%d.%d\n\t"
+		    "TCL will be disabled, expect reduced performance\n\t"
+		    "(prefer DRM radeon.o 1.3.x or newer)\n\t", 
+		    sPriv->drmMajor, sPriv->drmMinor, sPriv->drmPatch ); 
+   }
+
+
    /* This is first since which regions we map depends on whether or
     * not we are using a PCI card.
     */
    radeonScreen->IsPCI = radeonDRIPriv->IsPCI;
 
+   if (sPriv->drmMinor >= 3) {
+      int ret;
+      drmRadeonGetParam gp;
+
+      gp.param = RADEON_PARAM_AGP_BUFFER_OFFSET;
+      gp.value = &radeonScreen->agp_buffer_offset;
+
+      ret = drmCommandWriteRead( sPriv->fd, DRM_RADEON_GETPARAM,
+				 &gp, sizeof(gp));
+      if (ret) {
+	 FREE( radeonScreen );
+	 fprintf(stderr, "drmRadeonGetParam (RADEON_PARAM_AGP_BUFFER_OFFSET): %d\n", ret);
+	 return NULL;
+      }
+
+      if (sPriv->drmMinor >= 6) {
+	 gp.param = RADEON_PARAM_IRQ_NR;
+	 gp.value = &radeonScreen->irq;
+
+	 ret = drmCommandWriteRead( sPriv->fd, DRM_RADEON_GETPARAM,
+				    &gp, sizeof(gp));
+	 if (ret) {
+	    FREE( radeonScreen );
+	    fprintf(stderr, "drmRadeonGetParam (RADEON_PARAM_IRQ_NR): %d\n", ret);
+	    return NULL;
+	 }
+      }
+
+   }
+
    radeonScreen->mmio.handle = radeonDRIPriv->registerHandle;
    radeonScreen->mmio.size   = radeonDRIPriv->registerSize;
    if ( drmMap( sPriv->fd,
@@ -144,16 +203,21 @@ radeonScreenPtr radeonCreateScreen( __DRIscreenPrivate *sPriv )
       }
    }
 
-
+   radeonScreen->chipset = 0;
    switch ( radeonDRIPriv->deviceID ) {
+   default:
+      fprintf(stderr, "unknown chip id, assuming full radeon support\n");
    case PCI_CHIP_RADEON_QD:
    case PCI_CHIP_RADEON_QE:
    case PCI_CHIP_RADEON_QF:
    case PCI_CHIP_RADEON_QG:
-      radeonScreen->chipset = RADEON_CARD_TYPE_RADEON;
-      break;
-   default:
-      radeonScreen->chipset = RADEON_CARD_TYPE_RADEON;
+   case PCI_CHIP_RV200_QW:
+   case PCI_CHIP_RADEON_LW:
+      radeonScreen->chipset |= RADEON_CHIPSET_TCL;
+   case PCI_CHIP_RADEON_QY:
+   case PCI_CHIP_RADEON_QZ:
+   case PCI_CHIP_RADEON_LY:
+   case PCI_CHIP_RADEON_LZ:
       break;
    }
 
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h
index b5f11bcad..12cecd0dc 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h,v 1.3 2002/02/22 21:45:01 dawes Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h,v 1.5 2002/12/16 16:18:58 dawes Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -39,24 +39,33 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #ifdef GLX_DIRECT_RENDERING
 
-#include <X11/Xlibint.h>
+/*
+ * IMPORTS: these headers contain all the DRI, X and kernel-related
+ * definitions that we need.
+ */
 #include "dri_util.h"
-#include "xf86drm.h"
-#include "xf86drmRadeon.h"
+#include "radeon_common.h"
+#include "radeon_dri.h"
+#include "radeon_reg.h"
 #include "radeon_sarea.h"
 
+
 typedef struct {
    drmHandle handle;			/* Handle to the DRM region */
    drmSize size;			/* Size of the DRM region */
    drmAddress map;			/* Mapping of the DRM region */
 } radeonRegionRec, *radeonRegionPtr;
 
+/* chipset features */
+#define RADEON_CHIPSET_TCL	(1 << 0)
+
 typedef struct {
 
    int chipset;
    int cpp;
    int IsPCI;				/* Current card is a PCI card */
    int AGPMode;
+   unsigned int irq;			/* IRQ number (0 means none) */
 
    unsigned int frontOffset;
    unsigned int frontPitch;
@@ -82,6 +91,7 @@ typedef struct {
 
    __DRIscreenPrivate *driScreen;
    unsigned int sarea_priv_offset;
+   unsigned int agp_buffer_offset;	/* offset in card memory space */
 } radeonScreenRec, *radeonScreenPtr;
 
 extern radeonScreenPtr radeonCreateScreen( __DRIscreenPrivate *sPriv );
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_span.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_span.c
index 41a2668df..044073320 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_span.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_span.c
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_span.c,v 1.5 2002/02/22 21:45:01 dawes Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_span.c,v 1.6 2002/10/30 12:51:56 alanh Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -31,7 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  * Authors:
  *   Kevin E. Martin <martin@valinux.com>
  *   Gareth Hughes <gareth@valinux.com>
- *   Keith Whitwell <keithw@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
  *
  */
 
@@ -292,12 +292,22 @@ static void radeonSetReadBuffer( GLcontext *ctx,
 
    switch ( mode ) {
    case GL_FRONT_LEFT:
-      rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
-      rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
+      if ( rmesa->sarea->pfCurrentPage == 1 ) {
+        rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
+      } else {
+      	rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
+      }
       break;
    case GL_BACK_LEFT:
-      rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
-      rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
+      if ( rmesa->sarea->pfCurrentPage == 1 ) {
+      	rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
+      } else {
+        rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
+      }
       break;
    default:
       assert(0);
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c
index 9077ee43f..fd921f8f9 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state.c,v 1.5 2002/09/16 18:05:20 eich Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state.c,v 1.8 2002/12/16 16:18:58 dawes Exp $ */
 /*
  * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
  *
@@ -25,19 +25,23 @@
  *
  * Authors:
  *    Gareth Hughes <gareth@valinux.com>
- *    Keith Whitwell <keithw@valinux.com>
+ *    Keith Whitwell <keith@tungstengraphics.com>
  */
 
 #include "radeon_context.h"
-#include "radeon_state.h"
 #include "radeon_ioctl.h"
-#include "radeon_tris.h"
-#include "radeon_vb.h"
+#include "radeon_state.h"
+#include "radeon_tcl.h"
 #include "radeon_tex.h"
+#include "radeon_swtcl.h"
+#include "radeon_vtxfmt.h"
 
+#include "mem.h"
 #include "mmath.h"
 #include "enums.h"
 #include "colormac.h"
+#include "light.h"
+#include "api_arrayelt.h"
 
 #include "swrast/swrast.h"
 #include "array_cache/acache.h"
@@ -46,6 +50,14 @@
 #include "swrast_setup/swrast_setup.h"
 
 
+#define MODEL_PROJ 0
+#define MODEL      1
+#define MODEL_IT   2
+#define TEXMAT_0   3
+#define TEXMAT_1   4
+#define TEXMAT_2   5
+
+
 /* =============================================================
  * Alpha blending
  */
@@ -53,55 +65,56 @@
 static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLchan ref )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+   RADEON_STATECHANGE( rmesa, ctx );
 
-   rmesa->state.hw.context.pp_misc &= ~(RADEON_ALPHA_TEST_OP_MASK |
-					RADEON_REF_ALPHA_MASK);
+   pp_misc &= ~(RADEON_ALPHA_TEST_OP_MASK | RADEON_REF_ALPHA_MASK);
+   pp_misc |= (ref & RADEON_REF_ALPHA_MASK);
 
    switch ( func ) {
    case GL_NEVER:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_FAIL;
+      pp_misc |= RADEON_ALPHA_TEST_FAIL;
       break;
    case GL_LESS:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_LESS;
+      pp_misc |= RADEON_ALPHA_TEST_LESS;
       break;
    case GL_EQUAL:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_EQUAL;
+      pp_misc |= RADEON_ALPHA_TEST_EQUAL;
       break;
    case GL_LEQUAL:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_LEQUAL;
+      pp_misc |= RADEON_ALPHA_TEST_LEQUAL;
       break;
    case GL_GREATER:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_GREATER;
+      pp_misc |= RADEON_ALPHA_TEST_GREATER;
       break;
    case GL_NOTEQUAL:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_NEQUAL;
+      pp_misc |= RADEON_ALPHA_TEST_NEQUAL;
       break;
    case GL_GEQUAL:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_GEQUAL;
+      pp_misc |= RADEON_ALPHA_TEST_GEQUAL;
       break;
    case GL_ALWAYS:
-      rmesa->state.hw.context.pp_misc |= RADEON_ALPHA_TEST_PASS;
+      pp_misc |= RADEON_ALPHA_TEST_PASS;
       break;
    }
 
-   rmesa->state.hw.context.pp_misc |= (ref & RADEON_REF_ALPHA_MASK);
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc;
 }
 
 static void radeonBlendEquation( GLcontext *ctx, GLenum mode )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint b = rmesa->state.hw.context.rb3d_blendcntl & ~RADEON_COMB_FCN_MASK;
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
    GLboolean fallback = GL_FALSE;
 
    switch ( mode ) {
-   case GL_FUNC_ADD_EXT:
+   case GL_FUNC_ADD:
    case GL_LOGIC_OP:
       b |= RADEON_COMB_FCN_ADD_CLAMP;
       break;
 
-   case GL_FUNC_SUBTRACT_EXT:
+   case GL_FUNC_SUBTRACT:
       b |= RADEON_COMB_FCN_SUB_CLAMP;
       break;
 
@@ -112,12 +125,12 @@ static void radeonBlendEquation( GLcontext *ctx, GLenum mode )
 
    FALLBACK( rmesa, RADEON_FALLBACK_BLEND_EQ, fallback );
    if ( !fallback ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-      rmesa->state.hw.context.rb3d_blendcntl = b;
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
       if ( ctx->Color.ColorLogicOpEnabled ) {
-	 rmesa->state.hw.context.rb3d_cntl |=  RADEON_ROP_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
       } else {
-	 rmesa->state.hw.context.rb3d_cntl &= ~RADEON_ROP_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
       }
    }
 }
@@ -125,8 +138,8 @@ static void radeonBlendEquation( GLcontext *ctx, GLenum mode )
 static void radeonBlendFunc( GLcontext *ctx, GLenum sfactor, GLenum dfactor )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint b = rmesa->state.hw.context.rb3d_blendcntl & ~(RADEON_SRC_BLEND_MASK |
-							 RADEON_DST_BLEND_MASK);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
+      ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
    GLboolean fallback = GL_FALSE;
 
    switch ( ctx->Color.BlendSrcRGB ) {
@@ -142,6 +155,12 @@ static void radeonBlendFunc( GLcontext *ctx, GLenum sfactor, GLenum dfactor )
    case GL_ONE_MINUS_DST_COLOR:
       b |= RADEON_SRC_BLEND_GL_ONE_MINUS_DST_COLOR;
       break;
+   case GL_SRC_COLOR:
+      b |= RADEON_SRC_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
    case GL_SRC_ALPHA:
       b |= RADEON_SRC_BLEND_GL_SRC_ALPHA;
       break;
@@ -184,6 +203,12 @@ static void radeonBlendFunc( GLcontext *ctx, GLenum sfactor, GLenum dfactor )
    case GL_ONE_MINUS_SRC_ALPHA:
       b |= RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA;
       break;
+   case GL_DST_COLOR:
+      b |= RADEON_DST_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
    case GL_DST_ALPHA:
       b |= RADEON_DST_BLEND_GL_DST_ALPHA;
       break;
@@ -200,8 +225,8 @@ static void radeonBlendFunc( GLcontext *ctx, GLenum sfactor, GLenum dfactor )
 
    FALLBACK( rmesa, RADEON_FALLBACK_BLEND_FUNC, fallback );
    if ( !fallback ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-      rmesa->state.hw.context.rb3d_blendcntl = b;
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
    }
 }
 
@@ -221,33 +246,33 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-   rmesa->state.hw.context.rb3d_zstencilcntl &= ~RADEON_Z_TEST_MASK;
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
 
    switch ( ctx->Depth.Func ) {
    case GL_NEVER:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_NEVER;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEVER;
       break;
    case GL_LESS:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_LESS;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LESS;
       break;
    case GL_EQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_EQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_EQUAL;
       break;
    case GL_LEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_LEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LEQUAL;
       break;
    case GL_GREATER:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_GREATER;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GREATER;
       break;
    case GL_NOTEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_NEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEQUAL;
       break;
    case GL_GEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_GEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GEQUAL;
       break;
    case GL_ALWAYS:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_Z_TEST_ALWAYS;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_ALWAYS;
       break;
    }
 }
@@ -256,19 +281,19 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
 static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+   RADEON_STATECHANGE( rmesa, ctx );
 
    if ( ctx->Depth.Mask ) {
-      rmesa->state.hw.context.rb3d_zstencilcntl |=  RADEON_Z_WRITE_ENABLE;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |=  RADEON_Z_WRITE_ENABLE;
    } else {
-      rmesa->state.hw.context.rb3d_zstencilcntl &= ~RADEON_Z_WRITE_ENABLE;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_WRITE_ENABLE;
    }
 }
 
 static void radeonClearDepth( GLcontext *ctx, GLclampd d )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint format = (rmesa->state.hw.context.rb3d_zstencilcntl &
+   GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
 		    RADEON_DEPTH_FORMAT_MASK);
 
    switch ( format ) {
@@ -286,22 +311,157 @@ static void radeonClearDepth( GLcontext *ctx, GLclampd d )
  * Fog
  */
 
+
 static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLchan c[4];
+   union { int i; float f; } c, d;
+   GLchan col[4];
+
+   c.i = rmesa->hw.fog.cmd[FOG_C];
+   d.i = rmesa->hw.fog.cmd[FOG_D];
+
+   switch (pname) {
+   case GL_FOG_MODE:
+      if (!ctx->Fog.Enabled)
+	 return;
+      RADEON_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
+      switch (ctx->Fog.Mode) {
+      case GL_LINEAR:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_LINEAR;
+	 if (ctx->Fog.Start == ctx->Fog.End) {
+	    c.f = 1.0F;
+	    d.f = 1.0F;
+	 }
+	 else {
+	    c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+	    d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start);
+	 }
+	 break;
+      case GL_EXP:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP;
+	 c.f = 0.0;
+	 d.f = ctx->Fog.Density;
+	 break;
+      case GL_EXP2:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP2;
+	 c.f = 0.0;
+	 d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+	 break;
+      default:
+	 return;
+      }
+      break;
+   case GL_FOG_DENSITY:
+      switch (ctx->Fog.Mode) {
+      case GL_EXP:
+	 c.f = 0.0;
+	 d.f = ctx->Fog.Density;
+	 break;
+      case GL_EXP2:
+	 c.f = 0.0;
+	 d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+	 break;
+      default:
+	 break;
+      }
+      break;
+   case GL_FOG_START:
+   case GL_FOG_END:
+      if (ctx->Fog.Mode == GL_LINEAR) {
+	 if (ctx->Fog.Start == ctx->Fog.End) {
+	    c.f = 1.0F;
+	    d.f = 1.0F;
+	 } else {
+	    c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+	    d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start);
+	 }
+      }
+      break;
+   case GL_FOG_COLOR: 
+      RADEON_STATECHANGE( rmesa, ctx );
+      UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] =
+	 radeonPackColor( 4, col[0], col[1], col[2], 0 );
+      break;
+   case GL_FOG_COORDINATE_SOURCE_EXT: 
+      /* What to do?
+       */
+      break;
+   default:
+      return;
+   }
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-   UNCLAMPED_FLOAT_TO_RGB_CHAN( c, ctx->Fog.Color );
-   rmesa->state.hw.context.pp_fog_color =
-      radeonPackColor( 4, c[0], c[1], c[2], 0 );
+   if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
+      RADEON_STATECHANGE( rmesa, fog );
+      rmesa->hw.fog.cmd[FOG_C] = c.i;
+      rmesa->hw.fog.cmd[FOG_D] = d.i;
+   }
 }
 
 
 /* =============================================================
- * Clipping
+ * Scissoring
  */
 
+
+static GLboolean intersect_rect( XF86DRIClipRectPtr out,
+				 XF86DRIClipRectPtr a,
+				 XF86DRIClipRectPtr b )
+{
+   *out = *a;
+   if ( b->x1 > out->x1 ) out->x1 = b->x1;
+   if ( b->y1 > out->y1 ) out->y1 = b->y1;
+   if ( b->x2 < out->x2 ) out->x2 = b->x2;
+   if ( b->y2 < out->y2 ) out->y2 = b->y2;
+   if ( out->x1 >= out->x2 ) return GL_FALSE;
+   if ( out->y1 >= out->y2 ) return GL_FALSE;
+   return GL_TRUE;
+}
+
+
+void radeonRecalcScissorRects( radeonContextPtr rmesa )
+{
+   XF86DRIClipRectPtr out;
+   int i;
+
+   /* Grow cliprect store?
+    */
+   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+	 rmesa->state.scissor.numAllocedClipRects += 1;	/* zero case */
+	 rmesa->state.scissor.numAllocedClipRects *= 2;
+      }
+
+      if (rmesa->state.scissor.pClipRects)
+	 FREE(rmesa->state.scissor.pClipRects);
+
+      rmesa->state.scissor.pClipRects = 
+	 MALLOC( rmesa->state.scissor.numAllocedClipRects * 
+		 sizeof(XF86DRIClipRectRec) );
+
+      if (!rmesa->state.scissor.numAllocedClipRects) {
+/*  	 FALLBACK( rmesa, RADEON_FALLBACK_MEMORY, GL_TRUE ); */
+	 rmesa->state.scissor.numAllocedClipRects = 0;
+	 return;
+      }
+   }
+   
+   out = rmesa->state.scissor.pClipRects;
+   rmesa->state.scissor.numClipRects = 0;
+
+   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
+      if ( intersect_rect( out, 
+			   &rmesa->pClipRects[i], 
+			   &rmesa->state.scissor.rect ) ) {
+	 rmesa->state.scissor.numClipRects++;
+	 out++;
+      }
+   }
+}
+
+
 static void radeonUpdateScissor( GLcontext *ctx )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
@@ -319,8 +479,7 @@ static void radeonUpdateScissor( GLcontext *ctx )
       rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
       rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
 
-      if ( ctx->Scissor.Enabled )
-	 rmesa->upload_cliprects = 1;
+      radeonRecalcScissorRects( rmesa );
    }
 }
 
@@ -330,10 +489,11 @@ static void radeonScissor( GLcontext *ctx,
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   if ( ctx->Scissor.Enabled )
+   if ( ctx->Scissor.Enabled ) {
       RADEON_FIREVERTICES( rmesa );	/* don't pipeline cliprect changes */
+      radeonUpdateScissor( ctx );
+   }
 
-   radeonUpdateScissor( ctx );
 }
 
 
@@ -344,27 +504,37 @@ static void radeonScissor( GLcontext *ctx,
 static void radeonCullFace( GLcontext *ctx, GLenum unused )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint s = rmesa->state.hw.setup1.se_cntl;
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+   GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
 
    s |= RADEON_FFACE_SOLID | RADEON_BFACE_SOLID;
+   t &= ~(RADEON_CULL_FRONT | RADEON_CULL_BACK);
 
    if ( ctx->Polygon.CullFlag ) {
       switch ( ctx->Polygon.CullFaceMode ) {
       case GL_FRONT:
 	 s &= ~RADEON_FFACE_SOLID;
+	 t |= RADEON_CULL_FRONT;
 	 break;
       case GL_BACK:
 	 s &= ~RADEON_BFACE_SOLID;
+	 t |= RADEON_CULL_BACK;
 	 break;
       case GL_FRONT_AND_BACK:
 	 s &= ~(RADEON_FFACE_SOLID | RADEON_BFACE_SOLID);
+	 t |= (RADEON_CULL_FRONT | RADEON_CULL_BACK);
 	 break;
       }
    }
 
-   if ( rmesa->state.hw.setup1.se_cntl != s ) {
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_SETUP);
-      rmesa->state.hw.setup1.se_cntl = s;
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      RADEON_STATECHANGE(rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+
+   if ( rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] != t ) {
+      RADEON_STATECHANGE(rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = t;
    }
 }
 
@@ -372,15 +542,19 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_SETUP );
-   rmesa->state.hw.setup1.se_cntl &= ~RADEON_FFACE_CULL_DIR_MASK;
+   RADEON_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
+
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_CULL_FRONT_IS_CCW;
 
    switch ( mode ) {
    case GL_CW:
-      rmesa->state.hw.setup1.se_cntl |= RADEON_FFACE_CULL_CW;
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CW;
       break;
    case GL_CCW:
-      rmesa->state.hw.setup1.se_cntl |= RADEON_FFACE_CULL_CCW;
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CCW;
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_CULL_FRONT_IS_CCW;
       break;
    }
 }
@@ -393,15 +567,16 @@ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_LINE | RADEON_UPLOAD_SETUP );
+   RADEON_STATECHANGE( rmesa, lin );
+   RADEON_STATECHANGE( rmesa, set );
 
    /* Line width is stored in U6.4 format.
     */
-   rmesa->state.hw.line.se_line_width = (GLuint)(widthf * 16.0);
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (GLuint)(widthf * 16.0);
    if ( widthf > 1.0 ) {
-      rmesa->state.hw.setup1.se_cntl |=  RADEON_WIDELINE_ENABLE;
+      rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_WIDELINE_ENABLE;
    } else {
-      rmesa->state.hw.setup1.se_cntl &= ~RADEON_WIDELINE_ENABLE;
+      rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_WIDELINE_ENABLE;
    }
 }
 
@@ -409,10 +584,9 @@ static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_LINE );
-
-   rmesa->state.hw.line.re_line_pattern = ((((GLuint)factor & 0xff) << 16) |
-					((GLuint)pattern));
+   RADEON_STATECHANGE( rmesa, lin );
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = 
+      ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
 }
 
 
@@ -430,9 +604,9 @@ static void radeonColorMask( GLcontext *ctx,
 				  ctx->Color.ColorMask[BCOMP],
 				  ctx->Color.ColorMask[ACOMP] );
 
-   if ( rmesa->state.hw.mask.rb3d_planemask != mask ) {
-      RADEON_STATECHANGE( rmesa,  RADEON_UPLOAD_MASKS );
-      rmesa->state.hw.mask.rb3d_planemask = mask;
+   if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
+      RADEON_STATECHANGE( rmesa, msk );
+      rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = mask;
    }
 }
 
@@ -447,15 +621,16 @@ static void radeonPolygonOffset( GLcontext *ctx,
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    GLfloat constant = units * rmesa->state.depth.scale;
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_ZBIAS );
-   rmesa->state.hw.zbias.se_zbias_factor   = *(GLuint *)&factor;
-   rmesa->state.hw.zbias.se_zbias_constant = *(GLuint *)&constant;
+   RADEON_STATECHANGE( rmesa, zbs );
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = *(GLuint *)&factor;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = *(GLuint *)&constant;
 }
 
 static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    GLuint i;
+   drmRadeonStipple stipple;
 
    /* Must flip pattern upside down.
     */
@@ -463,15 +638,34 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
       rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i];
    }
 
+   /* TODO: push this into cmd mechanism
+    */
    RADEON_FIREVERTICES( rmesa );
    LOCK_HARDWARE( rmesa );
 
    /* FIXME: Use window x,y offsets into stipple RAM.
     */
-   drmRadeonPolygonStipple( rmesa->dri.fd, rmesa->state.stipple.mask );
+   stipple.mask = rmesa->state.stipple.mask;
+   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
+                    &stipple, sizeof(drmRadeonStipple) );
    UNLOCK_HARDWARE( rmesa );
 }
 
+static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
+
+   /* Can't generally do unfilled via tcl, but some good special
+    * cases work. 
+    */
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag);
+   if (rmesa->TclFallback) {
+      radeonChooseRenderState( ctx );
+      radeonChooseVertexState( ctx );
+   }
+}
+
 
 /* =============================================================
  * Rendering attributes
@@ -487,34 +681,478 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
 static void radeonUpdateSpecular( GLcontext *ctx )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   CARD32 p = rmesa->state.hw.context.pp_cntl;
+   CARD32 p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
 
-   if ( ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR &&
-        ctx->Light.Enabled) {
+   if ( ctx->_TriangleCaps & DD_SEPARATE_SPECULAR ) {
       p |=  RADEON_SPECULAR_ENABLE;
    } else {
       p &= ~RADEON_SPECULAR_ENABLE;
    }
 
-   if ( rmesa->state.hw.context.pp_cntl != p ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-      rmesa->state.hw.context.pp_cntl = p;
+   if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p;
+   }
+
+   /* Bizzare: have to leave lighting enabled to get fog.
+    */
+   RADEON_STATECHANGE( rmesa, tcl );
+   if ((ctx->Light.Enabled &&
+	ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR)) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+   }
+   else if (ctx->Fog.Enabled) {
+      if (ctx->Light.Enabled) {
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      } else {
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      }
+   }
+   else if (ctx->Light.Enabled) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+   } else if (ctx->Fog.ColorSumEnabled ) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE;
+   }
+
+   /* Update vertex/render formats
+    */
+   if (rmesa->TclFallback) { 
+      radeonChooseRenderState( ctx );
+      radeonChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Materials
+ */
+
+
+/* Update on colormaterial, material emmissive/ambient, 
+ * lightmodel.globalambient
+ */
+static void update_global_ambient( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   float *fcmd = (float *)RADEON_DB_STATE( glt );
+
+   /* Need to do more if both emmissive & ambient are PREMULT:
+    */
+   if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &
+       ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+	(3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) 
+   {
+      COPY_3V( &fcmd[GLT_RED], 
+	       ctx->Light.Material[0].Emission);
+      ACC_SCALE_3V( &fcmd[GLT_RED],
+		   ctx->Light.Model.Ambient,
+		   ctx->Light.Material[0].Ambient);
+   } 
+   else
+   {
+      COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
+   }
+   
+   RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
+}
+
+/* Update on change to 
+ *    - light[p].colors
+ *    - light[p].enabled
+ *    - material,
+ *    - colormaterial enabled
+ *    - colormaterial bitmask
+ */
+static void update_light_colors( GLcontext *ctx, GLuint p )
+{
+   struct gl_light *l = &ctx->Light.Light[p];
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   if (l->Enabled) {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
+      GLuint bitmask = ctx->Light.ColorMaterialBitmask;
+      struct gl_material *mat = &ctx->Light.Material[0];
+
+      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );	 
+      COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
+      COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
+      
+      if (!ctx->Light.ColorMaterialEnabled)
+	 bitmask = 0;
+
+      if ((bitmask & FRONT_AMBIENT_BIT) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_AMBIENT_RED], mat->Ambient );
+
+      if ((bitmask & FRONT_DIFFUSE_BIT) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_DIFFUSE_RED], mat->Diffuse );
+      
+      if ((bitmask & FRONT_SPECULAR_BIT) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_SPECULAR_RED], mat->Specular );
+
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+   }
+}
+
+/* Also fallback for asym colormaterial mode in twoside lighting...
+ */
+static void check_twoside_fallback( GLcontext *ctx )
+{
+   GLboolean fallback = GL_FALSE;
+
+   if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+      if (memcmp( &ctx->Light.Material[0],
+		  &ctx->Light.Material[1],
+		  sizeof(struct gl_material)) != 0)
+	 fallback = GL_TRUE;  
+      else if (ctx->Light.ColorMaterialEnabled &&
+	       (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != 
+	       ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1))
+	 fallback = GL_TRUE;
+   }
+
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_LIGHT_TWOSIDE, fallback );
+}
+
+static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   if (ctx->Light.ColorMaterialEnabled) {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      GLuint light_model_ctl = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+      GLuint mask = ctx->Light.ColorMaterialBitmask;
+
+      /* Default to PREMULT:
+       */
+      light_model_ctl &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+			   (3 << RADEON_AMBIENT_SOURCE_SHIFT) |
+			   (3 << RADEON_DIFFUSE_SOURCE_SHIFT) |
+			   (3 << RADEON_SPECULAR_SOURCE_SHIFT)); 
+   
+      if (mask & FRONT_EMISSION_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_EMISSIVE_SOURCE_SHIFT);
+      }
+
+      if (mask & FRONT_AMBIENT_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_AMBIENT_SOURCE_SHIFT);
+      }
+	 
+      if (mask & FRONT_DIFFUSE_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_DIFFUSE_SOURCE_SHIFT);
+      }
+   
+      if (mask & FRONT_SPECULAR_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_SPECULAR_SOURCE_SHIFT);
+      }
+   
+      if (light_model_ctl != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) {
+	 GLuint p;
+
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl;      
+
+	 for (p = 0 ; p < MAX_LIGHTS; p++) 
+	    update_light_colors( ctx, p );
+	 update_global_ambient( ctx );
+      }
+   }
+   
+   check_twoside_fallback( ctx );
+}
+
+void radeonUpdateMaterial( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
+   GLuint p;
+   GLuint mask = ~0;
+   
+   if (ctx->Light.ColorMaterialEnabled)
+      mask &= ~ctx->Light.ColorMaterialBitmask;
+
+   if (RADEON_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+      
+   if (mask & FRONT_EMISSION_BIT) {
+      fcmd[MTL_EMMISSIVE_RED]   = ctx->Light.Material[0].Emission[0];
+      fcmd[MTL_EMMISSIVE_GREEN] = ctx->Light.Material[0].Emission[1];
+      fcmd[MTL_EMMISSIVE_BLUE]  = ctx->Light.Material[0].Emission[2];
+      fcmd[MTL_EMMISSIVE_ALPHA] = ctx->Light.Material[0].Emission[3];
+   }
+   if (mask & FRONT_AMBIENT_BIT) {
+      fcmd[MTL_AMBIENT_RED]     = ctx->Light.Material[0].Ambient[0];
+      fcmd[MTL_AMBIENT_GREEN]   = ctx->Light.Material[0].Ambient[1];
+      fcmd[MTL_AMBIENT_BLUE]    = ctx->Light.Material[0].Ambient[2];
+      fcmd[MTL_AMBIENT_ALPHA]   = ctx->Light.Material[0].Ambient[3];
+   }
+   if (mask & FRONT_DIFFUSE_BIT) {
+      fcmd[MTL_DIFFUSE_RED]     = ctx->Light.Material[0].Diffuse[0];
+      fcmd[MTL_DIFFUSE_GREEN]   = ctx->Light.Material[0].Diffuse[1];
+      fcmd[MTL_DIFFUSE_BLUE]    = ctx->Light.Material[0].Diffuse[2];
+      fcmd[MTL_DIFFUSE_ALPHA]   = ctx->Light.Material[0].Diffuse[3];
+   }
+   if (mask & FRONT_SPECULAR_BIT) {
+      fcmd[MTL_SPECULAR_RED]    = ctx->Light.Material[0].Specular[0];
+      fcmd[MTL_SPECULAR_GREEN]  = ctx->Light.Material[0].Specular[1];
+      fcmd[MTL_SPECULAR_BLUE]   = ctx->Light.Material[0].Specular[2];
+      fcmd[MTL_SPECULAR_ALPHA]  = ctx->Light.Material[0].Specular[3];
+   }
+   if (mask & FRONT_SHININESS_BIT) {
+      fcmd[MTL_SHININESS]       = ctx->Light.Material[0].Shininess;
+   }
+
+   if (RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mtl )) {
+      for (p = 0 ; p < MAX_LIGHTS; p++) 
+	 update_light_colors( ctx, p );
+
+      check_twoside_fallback( ctx );
+      update_global_ambient( ctx );
+   }
+   else if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_STATE))
+      fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__);
+}
+
+/* _NEW_LIGHT
+ * _NEW_MODELVIEW
+ * _MESA_NEW_NEED_EYE_COORDS
+ *
+ * Uses derived state from mesa:
+ *       _VP_inf_norm
+ *       _h_inf_norm
+ *       _Position
+ *       _NormDirection
+ *       _ModelViewInvScale
+ *       _NeedEyeCoords
+ *       _EyeZDir
+ *
+ * which are calculated in light.c and are correct for the current
+ * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
+ * and _MESA_NEW_NEED_EYE_COORDS.  
+ */
+static void update_light( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   /* Have to check these, or have an automatic shortcircuit mechanism
+    * to remove noop statechanges. (Or just do a better job on the
+    * front end).
+    */
+   {
+      GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+
+      if (ctx->_NeedEyeCoords)
+	 tmp &= ~RADEON_LIGHT_IN_MODELSPACE;
+      else
+	 tmp |= RADEON_LIGHT_IN_MODELSPACE;
+      
+
+      /* Leave this test disabled: (unexplained q3 lockup) (even with
+         new packets)
+      */
+      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) 
+      {
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp;
+      }
+   }
+
+   {
+      GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( eye );
+      fcmd[EYE_X] = ctx->_EyeZDir[0];
+      fcmd[EYE_Y] = ctx->_EyeZDir[1];
+      fcmd[EYE_Z] = - ctx->_EyeZDir[2];
+      fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale;
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.eye );
+   }
+
+
+/*     RADEON_STATECHANGE( rmesa, glt ); */
+
+   if (ctx->Light.Enabled) {
+      GLint p;
+      for (p = 0 ; p < MAX_LIGHTS; p++) {
+	 if (ctx->Light.Light[p].Enabled) {
+	    struct gl_light *l = &ctx->Light.Light[p];
+	    GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] );
+	    
+	    if (l->EyePosition[3] == 0.0) {
+	       COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); 
+	       COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); 
+	       fcmd[LIT_POSITION_W] = 0;
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    } else {
+	       COPY_4V( &fcmd[LIT_POSITION_X], l->_Position );
+	       fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0];
+	       fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1];
+	       fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2];
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    }
+
+	    RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+	 }
+      }
+   }
+}
+
+static void radeonLightfv( GLcontext *ctx, GLenum light,
+			   GLenum pname, const GLfloat *params )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLint p = light - GL_LIGHT0;
+   struct gl_light *l = &ctx->Light.Light[p];
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+   
+
+   switch (pname) {
+   case GL_AMBIENT:		
+   case GL_DIFFUSE:
+   case GL_SPECULAR:
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_SPOT_DIRECTION: 
+      /* picked up in update_light */	
+      break;
+
+   case GL_POSITION: {
+      /* positions picked up in update_light, but can do flag here */	
+      GLuint flag = (p&1)? RADEON_LIGHT_1_IS_LOCAL : RADEON_LIGHT_0_IS_LOCAL;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->EyePosition[3] != 0.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+
+   case GL_SPOT_EXPONENT:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_EXPONENT] = params[0];
+      break;
+
+   case GL_SPOT_CUTOFF: {
+      GLuint flag = (p&1) ? RADEON_LIGHT_1_IS_SPOT : RADEON_LIGHT_0_IS_SPOT;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff;
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->SpotCutoff != 180.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+
+   case GL_CONSTANT_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_CONST] = params[0];
+      break;
+   case GL_LINEAR_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_LINEAR] = params[0];
+      break;
+   case GL_QUADRATIC_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_QUADRATIC] = params[0];
+      break;
+   default:
+      return;
    }
+
 }
 
+		  
+
 
 static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
 				const GLfloat *param )
 {
-   if ( pname == GL_LIGHT_MODEL_COLOR_CONTROL ) {
-      radeonUpdateSpecular(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   switch (pname) {
+      case GL_LIGHT_MODEL_AMBIENT: 
+	 update_global_ambient( ctx );
+	 break;
+
+      case GL_LIGHT_MODEL_LOCAL_VIEWER:
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.LocalViewer)
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LOCAL_VIEWER;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LOCAL_VIEWER;
+         break;
+
+      case GL_LIGHT_MODEL_TWO_SIDE:
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.TwoSide)
+	    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_LIGHT_TWOSIDE;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_LIGHT_TWOSIDE;
+
+	 check_twoside_fallback( ctx );
+
+	 if (rmesa->TclFallback) {
+	    radeonChooseRenderState( ctx );
+	    radeonChooseVertexState( ctx );
+	 }
+         break;
+
+      case GL_LIGHT_MODEL_COLOR_CONTROL:
+	 radeonUpdateSpecular(ctx);
+
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) 
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= 
+	       ~RADEON_DIFFUSE_SPECULAR_COMBINE;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= 
+	       RADEON_DIFFUSE_SPECULAR_COMBINE;
+         break;
+
+      default:
+         break;
    }
 }
 
 static void radeonShadeModel( GLcontext *ctx, GLenum mode )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint s = rmesa->state.hw.setup1.se_cntl;
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
 
    s &= ~(RADEON_DIFFUSE_SHADE_MASK |
 	  RADEON_ALPHA_SHADE_MASK |
@@ -538,9 +1176,45 @@ static void radeonShadeModel( GLcontext *ctx, GLenum mode )
       return;
    }
 
-   if ( rmesa->state.hw.setup1.se_cntl != s ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_SETUP );
-      rmesa->state.hw.setup1.se_cntl = s;
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+}
+
+
+/* =============================================================
+ * User clip planes
+ */
+
+static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+   GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+   RADEON_STATECHANGE( rmesa, ucp[p] );
+   rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+   rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+   rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+   rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+}
+
+static void radeonUpdateClipPlanes( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint p;
+
+   for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+      if (ctx->Transform.ClipEnabled[p]) {
+	 GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+	 RADEON_STATECHANGE( rmesa, ucp[p] );
+	 rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+	 rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+	 rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+	 rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+      }
    }
 }
 
@@ -556,50 +1230,50 @@ static void radeonStencilFunc( GLcontext *ctx, GLenum func,
    GLuint refmask = ((ctx->Stencil.Ref << RADEON_STENCIL_REF_SHIFT) |
 		     (ctx->Stencil.ValueMask << RADEON_STENCIL_MASK_SHIFT));
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_MASKS );
+   RADEON_STATECHANGE( rmesa, ctx );
+   RADEON_STATECHANGE( rmesa, msk );
 
-   rmesa->state.hw.context.rb3d_zstencilcntl &= ~RADEON_STENCIL_TEST_MASK;
-   rmesa->state.hw.mask.rb3d_stencilrefmask &= ~(RADEON_STENCIL_REF_MASK|
-					      RADEON_STENCIL_VALUE_MASK);
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_STENCIL_TEST_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~(RADEON_STENCIL_REF_MASK|
+						   RADEON_STENCIL_VALUE_MASK);
 
    switch ( ctx->Stencil.Function ) {
    case GL_NEVER:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_NEVER;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEVER;
       break;
    case GL_LESS:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_LESS;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LESS;
       break;
    case GL_EQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_EQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_EQUAL;
       break;
    case GL_LEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_LEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LEQUAL;
       break;
    case GL_GREATER:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_GREATER;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GREATER;
       break;
    case GL_NOTEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_NEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEQUAL;
       break;
    case GL_GEQUAL:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_GEQUAL;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GEQUAL;
       break;
    case GL_ALWAYS:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_TEST_ALWAYS;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_ALWAYS;
       break;
    }
 
-   rmesa->state.hw.mask.rb3d_stencilrefmask |= refmask;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
 }
 
 static void radeonStencilMask( GLcontext *ctx, GLuint mask )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_MASKS );
-   rmesa->state.hw.mask.rb3d_stencilrefmask &= ~RADEON_STENCIL_WRITE_MASK;
-
-   rmesa->state.hw.mask.rb3d_stencilrefmask |=
+   RADEON_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
       (ctx->Stencil.WriteMask << RADEON_STENCIL_WRITEMASK_SHIFT);
 }
 
@@ -608,71 +1282,71 @@ static void radeonStencilOp( GLcontext *ctx, GLenum fail,
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-   rmesa->state.hw.context.rb3d_zstencilcntl &= ~(RADEON_STENCIL_FAIL_MASK |
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(RADEON_STENCIL_FAIL_MASK |
 					       RADEON_STENCIL_ZFAIL_MASK |
 					       RADEON_STENCIL_ZPASS_MASK);
 
    switch ( ctx->Stencil.FailFunc ) {
    case GL_KEEP:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_KEEP;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_KEEP;
       break;
    case GL_ZERO:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_ZERO;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_ZERO;
       break;
    case GL_REPLACE:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_REPLACE;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_REPLACE;
       break;
    case GL_INCR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_INC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INC;
       break;
    case GL_DECR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_DEC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_DEC;
       break;
    case GL_INVERT:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_FAIL_INVERT;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INVERT;
       break;
    }
 
    switch ( ctx->Stencil.ZFailFunc ) {
    case GL_KEEP:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_KEEP;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_KEEP;
       break;
    case GL_ZERO:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_ZERO;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_ZERO;
       break;
    case GL_REPLACE:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_REPLACE;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_REPLACE;
       break;
    case GL_INCR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_INC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INC;
       break;
    case GL_DECR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_DEC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_DEC;
       break;
    case GL_INVERT:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZFAIL_INVERT;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INVERT;
       break;
    }
 
    switch ( ctx->Stencil.ZPassFunc ) {
    case GL_KEEP:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_KEEP;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_KEEP;
       break;
    case GL_ZERO:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_ZERO;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_ZERO;
       break;
    case GL_REPLACE:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_REPLACE;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_REPLACE;
       break;
    case GL_INCR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_INC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INC;
       break;
    case GL_DECR:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_DEC;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_DEC;
       break;
    case GL_INVERT:
-      rmesa->state.hw.context.rb3d_zstencilcntl |= RADEON_STENCIL_ZPASS_INVERT;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INVERT;
       break;
    }
 }
@@ -681,9 +1355,10 @@ static void radeonClearStencil( GLcontext *ctx, GLint s )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   rmesa->state.stencil.clear = ((GLuint) ctx->Stencil.Clear |
-				 (0xff << RADEON_STENCIL_MASK_SHIFT) |
-				 (ctx->Stencil.WriteMask << RADEON_STENCIL_WRITEMASK_SHIFT));
+   rmesa->state.stencil.clear = 
+      ((GLuint) ctx->Stencil.Clear |
+       (0xff << RADEON_STENCIL_MASK_SHIFT) |
+       (ctx->Stencil.WriteMask << RADEON_STENCIL_WRITEMASK_SHIFT));
 }
 
 
@@ -695,6 +1370,7 @@ static void radeonClearStencil( GLcontext *ctx, GLint s )
  * To correctly position primitives:
  */
 #define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
 
 void radeonUpdateWindow( GLcontext *ctx )
 {
@@ -707,20 +1383,18 @@ void radeonUpdateWindow( GLcontext *ctx )
    GLfloat sx = v[MAT_SX];
    GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
    GLfloat sy = - v[MAT_SY];
-   GLfloat ty = (- v[MAT_TY]) + yoffset;
+   GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
    GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
    GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
-
-/*     fprintf(stderr, "radeonUpdateWindow %d,%d %dx%d\n", */
-/*  	   dPriv->x, dPriv->y, dPriv->w, dPriv->h); */
-
-   RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_VIEWPORT);
-   rmesa->state.hw.viewport.se_vport_xscale  = *(GLuint *)&sx;
-   rmesa->state.hw.viewport.se_vport_xoffset = *(GLuint *)&tx;
-   rmesa->state.hw.viewport.se_vport_yscale  = *(GLuint *)&sy;
-   rmesa->state.hw.viewport.se_vport_yoffset = *(GLuint *)&ty;
-   rmesa->state.hw.viewport.se_vport_zscale  = *(GLuint *)&sz;
-   rmesa->state.hw.viewport.se_vport_zoffset = *(GLuint *)&tz;
+   RADEON_FIREVERTICES( rmesa );
+   RADEON_STATECHANGE( rmesa, vpt );
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = *(GLuint *)&sx;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = *(GLuint *)&sy;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = *(GLuint *)&sz;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = *(GLuint *)&tz;
 }
 
 
@@ -753,29 +1427,19 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
    GLfloat tx = v[MAT_TX] + xoffset;
    GLfloat ty = (- v[MAT_TY]) + yoffset;
 
-   if ( rmesa->state.hw.viewport.se_vport_xoffset != tx ||
-	rmesa->state.hw.viewport.se_vport_yoffset != ty )
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != *(GLuint *)&tx ||
+	rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != *(GLuint *)&ty )
    {
-      rmesa->state.hw.viewport.se_vport_xoffset = *(GLuint *)&tx;
-      rmesa->state.hw.viewport.se_vport_yoffset = *(GLuint *)&ty;
-
-      if (rmesa->store.statenr) {
-	 int i;
-	 rmesa->store.state[0].dirty |= RADEON_UPLOAD_VIEWPORT;
-	 /* Note: assume vport x/yoffset are constant over the buffer:
-	  */
-	 for (i = 0 ; i < rmesa->store.statenr ; i++) {
-	    rmesa->store.state[i].viewport.se_vport_xoffset = *(GLuint *)&tx;
-	    rmesa->store.state[i].viewport.se_vport_yoffset = *(GLuint *)&ty;
-	 }
-      } else {
-	 rmesa->state.hw.dirty |= RADEON_UPLOAD_VIEWPORT;
-      }
-
+      /* Note: this should also modify whatever data the context reset
+       * code uses...
+       */
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
+      
       /* update polygon stipple x/y screen offset */
       {
          GLuint stx, sty;
-         GLuint m = rmesa->state.hw.misc.re_misc;
+         GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC];
 
          m &= ~(RADEON_STIPPLE_X_OFFSET_MASK |
                 RADEON_STIPPLE_Y_OFFSET_MASK);
@@ -788,9 +1452,9 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
          m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
                (sty << RADEON_STIPPLE_Y_OFFSET_SHIFT));
 
-         if ( rmesa->state.hw.misc.re_misc != m ) {
-            rmesa->state.hw.misc.re_misc = m;
-            RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_MISC);
+         if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) {
+            RADEON_STATECHANGE( rmesa, msc );
+	    rmesa->hw.msc.cmd[MSC_RE_MISC] = m;
          }
       }
    }
@@ -845,8 +1509,8 @@ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
 
    ASSERT( rop < 16 );
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_MASKS );
-   rmesa->state.hw.mask.rb3d_ropcntl = radeon_rop_tab[rop];
+   RADEON_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = radeon_rop_tab[rop];
 }
 
 
@@ -860,7 +1524,9 @@ void radeonSetCliprects( radeonContextPtr rmesa, GLenum mode )
       rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
       break;
    case GL_BACK_LEFT:
-      if ( dPriv->numBackClipRects == 0 ) {
+      /* Can't ignore 2d windows if we are page flipping.
+       */
+      if ( dPriv->numBackClipRects == 0 || rmesa->doPageFlip ) {
 	 rmesa->numClipRects = dPriv->numClipRects;
 	 rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
       }
@@ -870,10 +1536,12 @@ void radeonSetCliprects( radeonContextPtr rmesa, GLenum mode )
       }
       break;
    default:
+      fprintf(stderr, "bad mode in radeonSetCliprects\n");
       return;
    }
 
-   rmesa->upload_cliprects = 1;
+   if (rmesa->state.scissor.enabled)
+      radeonRecalcScissorRects( rmesa );
 }
 
 
@@ -881,19 +1549,37 @@ static void radeonSetDrawBuffer( GLcontext *ctx, GLenum mode )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
+   if (RADEON_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "%s %s\n", __FUNCTION__,
+	      _mesa_lookup_enum_by_nr( mode ));
+
    RADEON_FIREVERTICES(rmesa);	/* don't pipeline cliprect changes */
 
    switch ( mode ) {
    case GL_FRONT_LEFT:
       FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
-      rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+      if ( rmesa->sarea->pfCurrentPage == 1 ) {
+        rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+      } else {
+      	rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+      }
+      rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
+      rmesa->state.pixel.readPitch = rmesa->state.color.drawPitch;
       radeonSetCliprects( rmesa, GL_FRONT_LEFT );
       break;
    case GL_BACK_LEFT:
       FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
-      rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+      if ( rmesa->sarea->pfCurrentPage == 1 ) {
+      	rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+      } else {
+        rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+      }
+      rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
+      rmesa->state.pixel.readPitch = rmesa->state.color.drawPitch;
       radeonSetCliprects( rmesa, GL_BACK_LEFT );
       break;
    default:
@@ -901,10 +1587,10 @@ static void radeonSetDrawBuffer( GLcontext *ctx, GLenum mode )
       return;
    }
 
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-   rmesa->state.hw.context.rb3d_coloroffset = (rmesa->state.color.drawOffset &
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = (rmesa->state.color.drawOffset &
 					    RADEON_COLOROFFSET_MASK);
-   rmesa->state.hw.context.rb3d_colorpitch = rmesa->state.color.drawPitch;
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch;
 }
 
 
@@ -915,9 +1601,10 @@ static void radeonSetDrawBuffer( GLcontext *ctx, GLenum mode )
 static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint p, flag;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API )
-       fprintf( stderr, "%s( %s = %s )\n",__FUNCTION__,
+   if ( RADEON_DEBUG & DEBUG_STATE )
+      fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__,
 	       _mesa_lookup_enum_by_nr( cap ),
 	       state ? "GL_TRUE" : "GL_FALSE" );
 
@@ -930,369 +1617,487 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
       break;
 
    case GL_ALPHA_TEST:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE( rmesa, ctx );
       if (state) {
-	 rmesa->state.hw.context.pp_cntl |= RADEON_ALPHA_TEST_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_ALPHA_TEST_ENABLE;
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_ALPHA_TEST_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ALPHA_TEST_ENABLE;
       }
       break;
 
    case GL_BLEND:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE( rmesa, ctx );
       if (state) {
-	 rmesa->state.hw.context.rb3d_cntl |=  RADEON_ALPHA_BLEND_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ALPHA_BLEND_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ALPHA_BLEND_ENABLE;
+      }
+      if ( ctx->Color.ColorLogicOpEnabled ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
       } else {
-	 rmesa->state.hw.context.rb3d_cntl &= ~RADEON_ALPHA_BLEND_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
       }
       break;
 
+   case GL_CLIP_PLANE0:
+   case GL_CLIP_PLANE1:
+   case GL_CLIP_PLANE2:
+   case GL_CLIP_PLANE3:
+   case GL_CLIP_PLANE4:
+   case GL_CLIP_PLANE5: 
+      p = cap-GL_CLIP_PLANE0;
+      RADEON_STATECHANGE( rmesa, tcl );
+      if (state) {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (RADEON_UCP_ENABLE_0<<p);
+	 radeonClipPlane( ctx, cap, NULL );
+      }
+      else {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(RADEON_UCP_ENABLE_0<<p);
+      }
+      break;
+
+   case GL_COLOR_MATERIAL:
+      radeonColorMaterial( ctx, 0, 0 );
+      if (!state) 
+	 radeonUpdateMaterial( ctx );
+      break;
+
    case GL_CULL_FACE:
       radeonCullFace( ctx, 0 );
       break;
 
    case GL_DEPTH_TEST:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE(rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.rb3d_cntl |=  RADEON_Z_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_Z_ENABLE;
       } else {
-	 rmesa->state.hw.context.rb3d_cntl &= ~RADEON_Z_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_Z_ENABLE;
       }
       break;
 
    case GL_DITHER:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE(rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.rb3d_cntl |=  RADEON_DITHER_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
       } else {
-	 rmesa->state.hw.context.rb3d_cntl &= ~RADEON_DITHER_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
       }
       break;
 
    case GL_FOG:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE(rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.pp_cntl |=  RADEON_FOG_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_FOG_ENABLE;
+	 radeonFogfv( ctx, GL_FOG_MODE, 0 );
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_FOG_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_FOG_ENABLE;
+	 RADEON_STATECHANGE(rmesa, tcl);
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
       }
+      radeonUpdateSpecular( ctx ); /* for PK_SPEC */
+      if (rmesa->TclFallback) 
+	 radeonChooseVertexState( ctx );
+      break;
+
+   case GL_LIGHT0:
+   case GL_LIGHT1:
+   case GL_LIGHT2:
+   case GL_LIGHT3:
+   case GL_LIGHT4:
+   case GL_LIGHT5:
+   case GL_LIGHT6:
+   case GL_LIGHT7:
+      RADEON_STATECHANGE(rmesa, tcl);
+      p = cap - GL_LIGHT0;
+      if (p&1) 
+	 flag = (RADEON_LIGHT_1_ENABLE |
+		 RADEON_LIGHT_1_ENABLE_AMBIENT | 
+		 RADEON_LIGHT_1_ENABLE_SPECULAR);
+      else
+	 flag = (RADEON_LIGHT_0_ENABLE |
+		 RADEON_LIGHT_0_ENABLE_AMBIENT | 
+		 RADEON_LIGHT_0_ENABLE_SPECULAR);
+
+      if (state)
+	 rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
+
+      /* 
+       */
+      update_light_colors( ctx, p );
       break;
 
    case GL_LIGHTING:
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (state) {
+/*  	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; */
+/*  	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE; */
+      }
+      else {
+/*  	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE; */
+/*  	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE; */
+      }
       radeonUpdateSpecular(ctx);
+      check_twoside_fallback( ctx );
       break;
 
    case GL_LINE_SMOOTH:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+      RADEON_STATECHANGE( rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.pp_cntl |=  RADEON_ANTI_ALIAS_LINE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_ANTI_ALIAS_LINE;
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_ANTI_ALIAS_LINE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_LINE;
       }
       break;
 
    case GL_LINE_STIPPLE:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+      RADEON_STATECHANGE( rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.pp_cntl |=  RADEON_PATTERN_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_PATTERN_ENABLE;
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_PATTERN_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_PATTERN_ENABLE;
       }
       break;
 
    case GL_COLOR_LOGIC_OP:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+      RADEON_STATECHANGE( rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.rb3d_cntl |=  RADEON_ROP_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
       } else {
-	 rmesa->state.hw.context.rb3d_cntl &= ~RADEON_ROP_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
       }
       break;
-
-   case GL_POLYGON_OFFSET_POINT:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_SETUP );
+      
+   case GL_NORMALIZE:
+      RADEON_STATECHANGE( rmesa, tcl );
       if ( state ) {
-	 rmesa->state.hw.setup1.se_cntl |=  RADEON_ZBIAS_ENABLE_POINT;
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_NORMALIZE_NORMALS;
       } else {
-	 rmesa->state.hw.setup1.se_cntl &= ~RADEON_ZBIAS_ENABLE_POINT;
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_NORMALIZE_NORMALS;
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_POINT:
+      if (rmesa->dri.drmMinor == 1) {
+	 radeonChooseRenderState( ctx );
+      } 
+      else {
+	 RADEON_STATECHANGE( rmesa, set );
+	 if ( state ) {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_POINT;
+	 } else {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_POINT;
+	 }
       }
       break;
 
    case GL_POLYGON_OFFSET_LINE:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_SETUP );
-      if ( state ) {
-	 rmesa->state.hw.setup1.se_cntl |=  RADEON_ZBIAS_ENABLE_LINE;
-      } else {
-	 rmesa->state.hw.setup1.se_cntl &= ~RADEON_ZBIAS_ENABLE_LINE;
+      if (rmesa->dri.drmMinor == 1) {
+	 radeonChooseRenderState( ctx );
+      } 
+      else {
+	 RADEON_STATECHANGE( rmesa, set );
+	 if ( state ) {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_LINE;
+	 } else {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_LINE;
+	 }
       }
       break;
 
    case GL_POLYGON_OFFSET_FILL:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_SETUP );
-      if ( state ) {
-	 rmesa->state.hw.setup1.se_cntl |=  RADEON_ZBIAS_ENABLE_TRI;
-      } else {
-	 rmesa->state.hw.setup1.se_cntl &= ~RADEON_ZBIAS_ENABLE_TRI;
+      if (rmesa->dri.drmMinor == 1) {
+	 radeonChooseRenderState( ctx );
+      } 
+      else {
+	 RADEON_STATECHANGE( rmesa, set );
+	 if ( state ) {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_TRI;
+	 } else {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_TRI;
+	 }
       }
       break;
 
    case GL_POLYGON_SMOOTH:
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+      RADEON_STATECHANGE( rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.pp_cntl |=  RADEON_ANTI_ALIAS_POLY;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_ANTI_ALIAS_POLY;
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_ANTI_ALIAS_POLY;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_POLY;
       }
       break;
 
    case GL_POLYGON_STIPPLE:
-      RADEON_STATECHANGE(rmesa, RADEON_UPLOAD_CONTEXT);
+      RADEON_STATECHANGE(rmesa, ctx );
       if ( state ) {
-	 rmesa->state.hw.context.pp_cntl |=  RADEON_STIPPLE_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_STIPPLE_ENABLE;
       } else {
-	 rmesa->state.hw.context.pp_cntl &= ~RADEON_STIPPLE_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_STIPPLE_ENABLE;
       }
       break;
 
+   case GL_RESCALE_NORMAL_EXT: {
+      GLboolean tmp = ctx->_NeedEyeCoords ? state : !state;
+      RADEON_STATECHANGE( rmesa, tcl );
+      if ( tmp ) {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_RESCALE_NORMALS;
+      } else {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
+      }
+      break;
+   }
+
    case GL_SCISSOR_TEST:
       RADEON_FIREVERTICES( rmesa );
       rmesa->state.scissor.enabled = state;
-      rmesa->upload_cliprects = 1;
+      radeonUpdateScissor( ctx );
       break;
 
    case GL_STENCIL_TEST:
       if ( rmesa->state.stencil.hwBuffer ) {
-	 RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
+	 RADEON_STATECHANGE( rmesa, ctx );
 	 if ( state ) {
-	    rmesa->state.hw.context.rb3d_cntl |=  RADEON_STENCIL_ENABLE;
+	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
 	 } else {
-	    rmesa->state.hw.context.rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
+	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE;
 	 }
       } else {
 	 FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state );
       }
       break;
 
+   case GL_TEXTURE_GEN_Q:
+   case GL_TEXTURE_GEN_R:
+   case GL_TEXTURE_GEN_S:
+   case GL_TEXTURE_GEN_T:
+      /* Picked up in radeonUpdateTextureState.
+       */
+      rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; 
+      break;
+
+   case GL_COLOR_SUM_EXT:
+      radeonUpdateSpecular ( ctx );
+      break;
+
    default:
       return;
    }
 }
 
 
+static void radeonLightingSpaceChange( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLboolean tmp;
+   RADEON_STATECHANGE( rmesa, tcl );
+
+   if (RADEON_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, ctx->_NeedEyeCoords);
+
+   if (ctx->_NeedEyeCoords)
+      tmp = ctx->Transform.RescaleNormals;
+   else
+      tmp = !ctx->Transform.RescaleNormals;
+
+   if ( tmp ) {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_RESCALE_NORMALS;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
+   }
+}
+
 /* =============================================================
- * State initialization, management
+ * Deferred state management - matrices, textures, other?
  */
 
-void radeonPrintDirty( const char *msg, GLuint state )
-{
-   fprintf( stderr,
-	    "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s\n",
-	    msg,
-	    state,
-	    (state & RADEON_UPLOAD_CONTEXT)     ? "context, " : "",
-	    (state & RADEON_UPLOAD_LINE)        ? "line, " : "",
-	    (state & RADEON_UPLOAD_BUMPMAP)     ? "bumpmap, " : "",
-	    (state & RADEON_UPLOAD_MASKS)       ? "masks, " : "",
-	    (state & RADEON_UPLOAD_VIEWPORT)    ? "viewport, " : "",
-	    (state & RADEON_UPLOAD_SETUP)       ? "setup, " : "",
-	    (state & RADEON_UPLOAD_TCL)         ? "tcl, " : "",
-	    (state & RADEON_UPLOAD_MISC)        ? "misc, " : "",
-	    (state & RADEON_UPLOAD_TEX0)        ? "tex0, " : "",
-	    (state & RADEON_UPLOAD_TEX1)        ? "tex1, " : "",
-	    (state & RADEON_UPLOAD_TEX2)        ? "tex2, " : "");
-}
 
 
 
+static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   int i;
+
 
+   for (i = 0 ; i < 4 ; i++) {
+      *dest++ = src[i];
+      *dest++ = src[i+4];
+      *dest++ = src[i+8];
+      *dest++ = src[i+12];
+   }
 
-static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
 {
-   _swrast_InvalidateState( ctx, new_state );
-   _swsetup_InvalidateState( ctx, new_state );
-   _ac_InvalidateState( ctx, new_state );
-   _tnl_InvalidateState( ctx, new_state );
-   RADEON_CONTEXT(ctx)->NewGLState |= new_state;
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   memcpy(dest, src, 16*sizeof(float));
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
 }
 
 
+static void update_texturematrix( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
+   GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
+   int unit;
 
+   rmesa->TexMatEnabled = 0;
 
-/* Initialize the context's hardware state.
- */
-void radeonInitState( radeonContextPtr rmesa )
-{
-   GLcontext *ctx = rmesa->glCtx;
-   GLuint color_fmt, depth_fmt;
+   for (unit = 0 ; unit < 2; unit++) {
+      if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
+      }
+      else if (ctx->TextureMatrix[unit].type != MATRIX_IDENTITY) {
+	 GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+	 
+	 rmesa->TexMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE|
+				  RADEON_TEXMAT_0_ENABLE) << unit;
+
+	 if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+	    /* Need to preconcatenate any active texgen 
+	     * obj/eyeplane matrices:
+	     */
+	    _math_matrix_mul_matrix( &rmesa->tmpmat, 
+				     &rmesa->TexGenMatrix[unit],
+				     &ctx->TextureMatrix[unit] );
+	    upload_matrix( rmesa, rmesa->tmpmat.m, TEXMAT_0+unit );
+	 } 
+	 else {
+	    rmesa->TexMatEnabled |= 
+	       (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+	    upload_matrix( rmesa, ctx->TextureMatrix[unit].m, 
+			   TEXMAT_0+unit );
+	 }
+      }
+      else if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+	 upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, 
+			TEXMAT_0+unit );
+      }
+   }
 
-   switch ( rmesa->radeonScreen->cpp ) {
-   case 2:
-      color_fmt = RADEON_COLOR_FORMAT_RGB565;
-      break;
-   case 4:
-      color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
-      break;
-   default:
-      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
-      exit( -1 );
+
+   tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
+
+   vs &= ~((0xf << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
+	   (0xf << RADEON_TCL_TEX_1_OUTPUT_SHIFT));
+
+   if (tpc & RADEON_TEXGEN_TEXMAT_0_ENABLE)
+      vs |= RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT;
+   else
+      vs |= RADEON_TCL_TEX_INPUT_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT;
+
+   if (tpc & RADEON_TEXGEN_TEXMAT_1_ENABLE)
+      vs |= RADEON_TCL_TEX_COMPUTED_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT;
+   else
+      vs |= RADEON_TCL_TEX_INPUT_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT;
+
+   if (tpc != rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] ||
+       vs != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]) {
+      
+      RADEON_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = tpc;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] = vs;
    }
+}
 
-   rmesa->state.color.clear = 0x00000000;
 
-   switch ( ctx->Visual.depthBits ) {
-   case 16:
-      rmesa->state.depth.clear = 0x0000ffff;
-      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
-      depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
-      rmesa->state.stencil.clear = 0x00000000;
-      break;
-   case 24:
-      rmesa->state.depth.clear = 0x00ffffff;
-      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
-      depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
-      rmesa->state.stencil.clear = 0xffff0000;
-      break;
-   default:
-      fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
-	       ctx->Visual.depthBits );
-      exit( -1 );
+
+void radeonValidateState( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint new_state = rmesa->NewGLState;
+
+   if (new_state & _NEW_TEXTURE) {
+      radeonUpdateTextureState( ctx );
+      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
    }
 
-   /* Only have hw stencil when depth buffer is 24 bits deep */
-   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
-				     ctx->Visual.depthBits == 24 );
+   /* Need an event driven matrix update?
+    */
+   if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) 
+      upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, MODEL_PROJ );
 
-   rmesa->RenderIndex = ~0;
-   rmesa->Fallback = 0;
-   rmesa->render_primitive = GL_TRIANGLES;
-   rmesa->hw_primitive = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST;
+   /* Need these for lighting (shouldn't upload otherwise)
+    */
+   if (new_state & (_NEW_MODELVIEW)) {
+      upload_matrix( rmesa, ctx->ModelView.m, MODEL );
+      upload_matrix_t( rmesa, ctx->ModelView.inv, MODEL_IT );
+   }
 
-   if ( ctx->Visual.doubleBufferMode ) {
-      rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
-      rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
-   } else {
-      rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
-      rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+   /* Does this need to be triggered on eg. modelview for
+    * texgen-derived objplane/eyeplane matrices?
+    */
+   if (new_state & _NEW_TEXTURE_MATRIX) {
+      update_texturematrix( ctx );
+   }      
+
+   if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
+      update_light( ctx );
    }
-   rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
-   rmesa->state.pixel.readPitch  = rmesa->state.color.drawPitch;
 
-   /* Harware state:
+   /* emit all active clip planes if projection matrix changes.
     */
-   rmesa->state.hw.context.pp_misc = (RADEON_ALPHA_TEST_PASS |
-				   RADEON_CHROMA_FUNC_FAIL |
-				   RADEON_CHROMA_KEY_NEAREST |
-				   RADEON_SHADOW_FUNC_EQUAL |
-				   RADEON_SHADOW_PASS_1 |
-				   RADEON_RIGHT_HAND_CUBE_OGL);
+   if (new_state & (_NEW_PROJECTION)) {
+      if (ctx->Transform._AnyClip) 
+	 radeonUpdateClipPlanes( ctx );
+   }
 
-   rmesa->state.hw.context.pp_fog_color = ((0x00000000 & RADEON_FOG_COLOR_MASK) |
-					RADEON_FOG_VERTEX |
-					RADEON_FOG_USE_DEPTH);
 
-   rmesa->state.hw.context.re_solid_color = 0x00000000;
+   rmesa->NewGLState = 0;
+}
 
-   rmesa->state.hw.context.rb3d_blendcntl = (RADEON_COMB_FCN_ADD_CLAMP |
-					  RADEON_SRC_BLEND_GL_ONE |
-					  RADEON_DST_BLEND_GL_ZERO );
 
-   rmesa->state.hw.context.rb3d_depthoffset = rmesa->radeonScreen->depthOffset;
+static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _ac_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _ae_invalidate_state( ctx, new_state );
+   RADEON_CONTEXT(ctx)->NewGLState |= new_state;
+   radeonVtxfmtInvalidate( ctx );
+}
 
-   rmesa->state.hw.context.rb3d_depthpitch = ((rmesa->radeonScreen->depthPitch &
-					    RADEON_DEPTHPITCH_MASK) |
-					   RADEON_DEPTH_ENDIAN_NO_SWAP);
+static void radeonWrapRunPipeline( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
 
-   rmesa->state.hw.context.rb3d_zstencilcntl = (depth_fmt |
-					     RADEON_Z_TEST_LESS |
-					     RADEON_STENCIL_TEST_ALWAYS |
-					     RADEON_STENCIL_FAIL_KEEP |
-					     RADEON_STENCIL_ZPASS_KEEP |
-					     RADEON_STENCIL_ZFAIL_KEEP |
-					     RADEON_Z_WRITE_ENABLE);
+   if (0)
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
 
-   rmesa->state.hw.context.pp_cntl = (RADEON_SCISSOR_ENABLE |
-				   RADEON_ANTI_ALIAS_NONE);
+   /* Validate state:
+    */
+   if (rmesa->NewGLState)
+      radeonValidateState( ctx );
 
-   rmesa->state.hw.context.rb3d_cntl = (RADEON_PLANE_MASK_ENABLE |
-				     color_fmt |
-				     RADEON_ZBLOCK16);
+   if (tnl->vb.Material) {
+      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_TRUE );
+   }
 
-   rmesa->state.hw.context.rb3d_coloroffset = (rmesa->state.color.drawOffset &
-					    RADEON_COLOROFFSET_MASK);
+   /* Run the pipeline.
+    */ 
+   _tnl_run_pipeline( ctx );
 
-   rmesa->state.hw.context.re_width_height = ((0x7ff << RADEON_RE_WIDTH_SHIFT) |
-					   (0x7ff << RADEON_RE_HEIGHT_SHIFT));
-
-   rmesa->state.hw.context.rb3d_colorpitch = ((rmesa->state.color.drawPitch &
-					    RADEON_COLORPITCH_MASK) |
-					   RADEON_COLOR_ENDIAN_NO_SWAP);
-
-   rmesa->state.hw.setup1.se_cntl = (RADEON_FFACE_CULL_CCW |
-				 RADEON_BFACE_SOLID |
-				 RADEON_FFACE_SOLID |
-				 RADEON_FLAT_SHADE_VTX_LAST |
-				 RADEON_DIFFUSE_SHADE_GOURAUD |
-				 RADEON_ALPHA_SHADE_GOURAUD |
-				 RADEON_SPECULAR_SHADE_GOURAUD |
-				 RADEON_FOG_SHADE_GOURAUD |
-				 RADEON_VPORT_XY_XFORM_ENABLE |
-				 RADEON_VPORT_Z_XFORM_ENABLE |
-				 RADEON_VTX_PIX_CENTER_OGL |
-				 RADEON_ROUND_MODE_TRUNC |
-				 RADEON_ROUND_PREC_8TH_PIX);
-
-   rmesa->state.hw.vertex.se_coord_fmt = (
-#if 1
-      RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
-      RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
-#else
-      RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
-#endif
-      RADEON_TEX1_W_ROUTING_USE_Q1);
-
-   rmesa->state.hw.setup2.se_cntl_status = (RADEON_VC_NO_SWAP |
-					    RADEON_TCL_BYPASS);
-
-   rmesa->state.hw.line.re_line_pattern = ((0x0000 & RADEON_LINE_PATTERN_MASK) |
-					(0 << RADEON_LINE_REPEAT_COUNT_SHIFT) |
-					(0 << RADEON_LINE_PATTERN_START_SHIFT) |
-					RADEON_LINE_PATTERN_LITTLE_BIT_ORDER);
-
-   rmesa->state.hw.line.re_line_state = ((0 << RADEON_LINE_CURRENT_PTR_SHIFT) |
-				      (1 << RADEON_LINE_CURRENT_COUNT_SHIFT));
-
-   rmesa->state.hw.line.se_line_width = (1 << 4);
-
-   rmesa->state.hw.bumpmap.pp_lum_matrix = 0x00000000;
-   rmesa->state.hw.bumpmap.pp_rot_matrix_0 = 0x00000000;
-   rmesa->state.hw.bumpmap.pp_rot_matrix_1 = 0x00000000;
-
-   rmesa->state.hw.mask.rb3d_stencilrefmask = ((0x00 << RADEON_STENCIL_REF_SHIFT) |
-					       (0xff << RADEON_STENCIL_MASK_SHIFT) |
-					       (0xff << RADEON_STENCIL_WRITEMASK_SHIFT));
-
-   rmesa->state.hw.mask.rb3d_ropcntl = RADEON_ROP_COPY;
-   rmesa->state.hw.mask.rb3d_planemask = 0xffffffff;
-
-   rmesa->state.hw.viewport.se_vport_xscale  = 0x00000000;
-   rmesa->state.hw.viewport.se_vport_xoffset = 0x00000000;
-   rmesa->state.hw.viewport.se_vport_yscale  = 0x00000000;
-   rmesa->state.hw.viewport.se_vport_yoffset = 0x00000000;
-   rmesa->state.hw.viewport.se_vport_zscale  = 0x00000000;
-   rmesa->state.hw.viewport.se_vport_zoffset = 0x00000000;
-
-   rmesa->state.hw.misc.re_misc = ((0 << RADEON_STIPPLE_X_OFFSET_SHIFT) |
-				   (0 << RADEON_STIPPLE_Y_OFFSET_SHIFT) |
-				   RADEON_STIPPLE_BIG_BIT_ORDER);
-
-   rmesa->state.hw.dirty = RADEON_UPLOAD_CONTEXT_ALL;
+   if (tnl->vb.Material) {
+      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_FALSE );
+      radeonUpdateMaterial( ctx ); /* not needed any more? */
+   }
 }
 
+
+
+
 /* Initialize the driver's state functions.
  */
 void radeonInitStateFuncs( GLcontext *ctx )
 {
    ctx->Driver.UpdateState		= radeonInvalidateState;
+   ctx->Driver.LightingSpaceChange      = radeonLightingSpaceChange;
 
    ctx->Driver.SetDrawBuffer		= radeonSetDrawBuffer;
 
@@ -1304,6 +2109,7 @@ void radeonInitStateFuncs( GLcontext *ctx )
    ctx->Driver.ClearDepth		= radeonClearDepth;
    ctx->Driver.ClearIndex		= NULL;
    ctx->Driver.ClearStencil		= radeonClearStencil;
+   ctx->Driver.ClipPlane		= radeonClipPlane;
    ctx->Driver.ColorMask		= radeonColorMask;
    ctx->Driver.CullFace			= radeonCullFace;
    ctx->Driver.DepthFunc		= radeonDepthFunc;
@@ -1315,12 +2121,15 @@ void radeonInitStateFuncs( GLcontext *ctx )
    ctx->Driver.Hint			= NULL;
    ctx->Driver.IndexMask		= NULL;
    ctx->Driver.LightModelfv		= radeonLightModelfv;
-   ctx->Driver.Lightfv			= NULL;
+   ctx->Driver.Lightfv			= radeonLightfv;
    ctx->Driver.LineStipple              = radeonLineStipple;
    ctx->Driver.LineWidth                = radeonLineWidth;
    ctx->Driver.LogicOpcode		= radeonLogicOpCode;
-   ctx->Driver.PolygonMode		= NULL;
-   ctx->Driver.PolygonOffset		= radeonPolygonOffset;
+   ctx->Driver.PolygonMode		= radeonPolygonMode;
+
+   if (RADEON_CONTEXT(ctx)->dri.drmMinor > 1)
+      ctx->Driver.PolygonOffset		= radeonPolygonOffset;
+
    ctx->Driver.PolygonStipple		= radeonPolygonStipple;
    ctx->Driver.RenderMode		= radeonRenderMode;
    ctx->Driver.Scissor			= radeonScissor;
@@ -1337,7 +2146,6 @@ void radeonInitStateFuncs( GLcontext *ctx )
    ctx->Driver.CopyPixels               = _swrast_CopyPixels;
    ctx->Driver.DrawPixels               = _swrast_DrawPixels;
    ctx->Driver.ReadPixels               = _swrast_ReadPixels;
-   ctx->Driver.ResizeBuffers            = _swrast_alloc_buffers;
 
    /* Swrast hooks for imaging extensions:
     */
@@ -1345,4 +2153,7 @@ void radeonInitStateFuncs( GLcontext *ctx )
    ctx->Driver.CopyColorSubTable	= _swrast_CopyColorSubTable;
    ctx->Driver.CopyConvolutionFilter1D	= _swrast_CopyConvolutionFilter1D;
    ctx->Driver.CopyConvolutionFilter2D	= _swrast_CopyConvolutionFilter2D;
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = radeonWrapRunPipeline;
 }
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_state.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_state.h
index b34e17133..45a368d95 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_state.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_state.h
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state.h,v 1.3 2002/09/16 18:05:20 eich Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state.h,v 1.5 2002/11/05 17:46:09 tsi Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -44,20 +44,33 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 extern void radeonInitState( radeonContextPtr rmesa );
 extern void radeonInitStateFuncs( GLcontext *ctx );
 
-extern void radeonUpdateWindow( GLcontext *ctx );
+extern void radeonUpdateMaterial( GLcontext *ctx );
+
 extern void radeonSetCliprects( radeonContextPtr rmesa, GLenum mode );
+extern void radeonRecalcScissorRects( radeonContextPtr rmesa );
 extern void radeonUpdateViewportOffset( GLcontext *ctx );
+extern void radeonUpdateWindow( GLcontext *ctx );
 
-extern void radeonPrintDirty( const char *msg, GLuint state );
+extern void radeonValidateState( GLcontext *ctx );
+
+extern void radeonPrintDirty( radeonContextPtr rmesa,
+			      const char *msg );
 
 
 extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
 #define FALLBACK( rmesa, bit, mode ) do {				\
-   if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",__FUNCTION__,	\
-		     bit, mode );					\
+   if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",		\
+		     __FUNCTION__, bit, mode );				\
    radeonFallback( rmesa->glCtx, bit, mode );				\
 } while (0)
 
 
+#define MODEL_PROJ 0
+#define MODEL      1
+#define MODEL_IT   2
+#define TEXMAT_0   3
+#define TEXMAT_1   4
+#define TEXMAT_2   5
+
 #endif
 #endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c
new file mode 100644
index 000000000..79c07e863
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c
@@ -0,0 +1,556 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c,v 1.3 2003/02/22 06:21:11 dawes Exp $ */
+/*
+ * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_tcl.h"
+#include "radeon_tex.h"
+#include "radeon_swtcl.h"
+#include "radeon_vtxfmt.h"
+
+#include "mem.h"
+#include "mmath.h"
+#include "enums.h"
+#include "colormac.h"
+#include "light.h"
+#include "api_arrayelt.h"
+
+#include "swrast/swrast.h"
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+
+/* =============================================================
+ * State initialization
+ */
+
+void radeonPrintDirty( radeonContextPtr rmesa, const char *msg )
+{
+   struct radeon_state_atom *l;
+
+   fprintf(stderr, msg);
+   fprintf(stderr, ": ");
+
+   foreach(l, &(rmesa->hw.dirty)) {
+      fprintf(stderr, "%s, ", l->name);
+   }
+
+   fprintf(stderr, "\n");
+}
+
+static int cmdpkt( int id ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.packet.cmd_type = RADEON_CMD_PACKET;
+   h.packet.packet_id = id;
+   return h.i;
+}
+
+static int cmdvec( int offset, int stride, int count ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.vectors.cmd_type = RADEON_CMD_VECTORS;
+   h.vectors.offset = offset;
+   h.vectors.stride = stride;
+   h.vectors.count = count;
+   return h.i;
+}
+
+static int cmdscl( int offset, int stride, int count ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.scalars.cmd_type = RADEON_CMD_SCALARS;
+   h.scalars.offset = offset;
+   h.scalars.stride = stride;
+   h.scalars.count = count;
+   return h.i;
+}
+
+#define CHECK( NM, FLAG )			\
+static GLboolean check_##NM( GLcontext *ctx )	\
+{						\
+   return FLAG;					\
+}
+
+#define TCL_CHECK( NM, FLAG )				\
+static GLboolean check_##NM( GLcontext *ctx )		\
+{							\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);	\
+   return !rmesa->TclFallback && (FLAG);		\
+}
+
+
+CHECK( always, GL_TRUE )
+CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled )
+CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+CHECK( fog, ctx->Fog.Enabled )
+TCL_CHECK( tcl, GL_TRUE )
+TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled )
+TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
+TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled )
+TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled )
+TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled )
+TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled )
+TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled )
+TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled )
+TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled )
+TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled )
+TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled )
+TCL_CHECK( tcl_ucp0, ctx->Transform.ClipEnabled[0] )
+TCL_CHECK( tcl_ucp1, ctx->Transform.ClipEnabled[1] )
+TCL_CHECK( tcl_ucp2, ctx->Transform.ClipEnabled[2] )
+TCL_CHECK( tcl_ucp3, ctx->Transform.ClipEnabled[3] )
+TCL_CHECK( tcl_ucp4, ctx->Transform.ClipEnabled[4] )
+TCL_CHECK( tcl_ucp5, ctx->Transform.ClipEnabled[5] )
+TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled ) 
+
+
+
+/* Initialize the context's hardware state.
+ */
+void radeonInitState( radeonContextPtr rmesa )
+{
+   GLcontext *ctx = rmesa->glCtx;
+   GLuint color_fmt, depth_fmt, i;
+
+   switch ( rmesa->radeonScreen->cpp ) {
+   case 2:
+      color_fmt = RADEON_COLOR_FORMAT_RGB565;
+      break;
+   case 4:
+      color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
+      exit( -1 );
+   }
+
+   rmesa->state.color.clear = 0x00000000;
+
+   switch ( ctx->Visual.depthBits ) {
+   case 16:
+      rmesa->state.depth.clear = 0x0000ffff;
+      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
+      depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+      rmesa->state.stencil.clear = 0x00000000;
+      break;
+   case 24:
+      rmesa->state.depth.clear = 0x00ffffff;
+      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
+      depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+      rmesa->state.stencil.clear = 0xff000000;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
+	       ctx->Visual.depthBits );
+      exit( -1 );
+   }
+
+   /* Only have hw stencil when depth buffer is 24 bits deep */
+   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
+				     ctx->Visual.depthBits == 24 );
+
+   rmesa->Fallback = 0;
+
+   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
+      rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+      rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+   } else {
+      rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+      rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+   }
+   rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
+   rmesa->state.pixel.readPitch  = rmesa->state.color.drawPitch;
+
+   /* Initialize lists:
+    */
+   make_empty_list(&(rmesa->hw.dirty));
+   make_empty_list(&(rmesa->hw.clean));
+
+
+#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )				\
+   do {								\
+      rmesa->hw.ATOM.cmd_size = SZ;				\
+      rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));	\
+      rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));	\
+      rmesa->hw.ATOM.name = NM;					\
+      rmesa->hw.ATOM.is_tcl = FLAG;					\
+      rmesa->hw.ATOM.check = check_##CHK;				\
+      insert_at_head(&(rmesa->hw.dirty), &(rmesa->hw.ATOM));	\
+   } while (0)
+      
+      
+   /* Allocate state buffers:
+    */
+   ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 );
+   ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+   ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+   ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
+   ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
+   ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
+   ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
+   ALLOC_STATE( tcl, always, TCL_STATE_SIZE, "TCL/tcl", 1 );
+   ALLOC_STATE( mtl, tcl_lighting, MTL_STATE_SIZE, "MTL/material", 1 );
+   ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+   ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
+   ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
+   ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+   ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 );
+   ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 );
+   ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+   ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
+   ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+   ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+   ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+   ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+   ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+   ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+   ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+   ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+   ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+   ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 );
+   ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 );
+   ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 );
+   ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 );
+   ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 );
+   ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
+   ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+   ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
+
+
+   /* Fill in the packet headers:
+    */
+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
+   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS);
+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
+   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0);
+   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0);
+   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1);
+   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1);
+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
+   rmesa->hw.mtl.cmd[MTL_CMD_0] = 
+      cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
+   rmesa->hw.grd.cmd[GRD_CMD_0] = 
+      cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
+   rmesa->hw.fog.cmd[FOG_CMD_0] = 
+      cmdvec( RADEON_VS_FOG_PARAM_ADDR, 1, 4 );
+   rmesa->hw.glt.cmd[GLT_CMD_0] = 
+      cmdvec( RADEON_VS_GLOBAL_AMBIENT_ADDR, 1, 4 );
+   rmesa->hw.eye.cmd[EYE_CMD_0] = 
+      cmdvec( RADEON_VS_EYE_VECTOR_ADDR, 1, 4 );
+
+   for (i = 0 ; i < 5; i++) {
+      rmesa->hw.mat[i].cmd[MAT_CMD_0] = 
+	 cmdvec( RADEON_VS_MATRIX_0_ADDR + i*4, 1, 16);
+   }
+
+   for (i = 0 ; i < 8; i++) {
+      rmesa->hw.lit[i].cmd[LIT_CMD_0] = 
+	 cmdvec( RADEON_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 );
+      rmesa->hw.lit[i].cmd[LIT_CMD_1] = 
+	 cmdscl( RADEON_SS_LIGHT_DCD_ADDR + i, 8, 6 );
+   }
+
+   for (i = 0 ; i < 6; i++) {
+      rmesa->hw.ucp[i].cmd[UCP_CMD_0] = 
+	 cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 );
+   }
+
+   rmesa->last_ReallyEnabled = -1;
+
+   /* Initial Harware state:
+    */
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = (RADEON_ALPHA_TEST_PASS |
+				     RADEON_CHROMA_FUNC_FAIL |
+				     RADEON_CHROMA_KEY_NEAREST |
+				     RADEON_SHADOW_FUNC_EQUAL |
+				     RADEON_SHADOW_PASS_1 |
+				     RADEON_RIGHT_HAND_CUBE_OGL);
+
+   rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (RADEON_FOG_VERTEX |
+					  RADEON_FOG_USE_DEPTH);
+
+   rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (RADEON_COMB_FCN_ADD_CLAMP |
+					    RADEON_SRC_BLEND_GL_ONE |
+					    RADEON_DST_BLEND_GL_ZERO );
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
+      rmesa->radeonScreen->depthOffset;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
+      ((rmesa->radeonScreen->depthPitch &
+	RADEON_DEPTHPITCH_MASK) |
+       RADEON_DEPTH_ENDIAN_NO_SWAP);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
+					       RADEON_Z_TEST_LESS |
+					       RADEON_STENCIL_TEST_ALWAYS |
+					       RADEON_STENCIL_FAIL_KEEP |
+					       RADEON_STENCIL_ZPASS_KEEP |
+					       RADEON_STENCIL_ZFAIL_KEEP |
+					       RADEON_Z_WRITE_ENABLE);
+
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (RADEON_SCISSOR_ENABLE |
+				     RADEON_ANTI_ALIAS_NONE);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE |
+				       color_fmt |
+				       (1<<15));
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = (rmesa->state.color.drawOffset &
+					      RADEON_COLOROFFSET_MASK);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch &
+					      RADEON_COLORPITCH_MASK) |
+					     RADEON_COLOR_ENDIAN_NO_SWAP);
+
+   rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW |
+				     RADEON_BFACE_SOLID |
+				     RADEON_FFACE_SOLID |
+/*  			     RADEON_BADVTX_CULL_DISABLE | */
+				     RADEON_FLAT_SHADE_VTX_LAST |
+				     RADEON_DIFFUSE_SHADE_GOURAUD |
+				     RADEON_ALPHA_SHADE_GOURAUD |
+				     RADEON_SPECULAR_SHADE_GOURAUD |
+				     RADEON_FOG_SHADE_GOURAUD |
+				     RADEON_VPORT_XY_XFORM_ENABLE |
+				     RADEON_VPORT_Z_XFORM_ENABLE |
+				     RADEON_VTX_PIX_CENTER_OGL |
+				     RADEON_ROUND_MODE_TRUNC |
+				     RADEON_ROUND_PREC_8TH_PIX);
+
+   rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] =
+#ifdef MESA_BIG_ENDIAN
+					    RADEON_VC_32BIT_SWAP;
+#else
+  					    RADEON_VC_NO_SWAP;
+#endif
+
+   if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
+     rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS;
+   }
+
+   rmesa->hw.set.cmd[SET_SE_COORDFMT] = (
+      RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+      RADEON_TEX1_W_ROUTING_USE_Q1);
+
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((1 << 16) | 0xffff);
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] = 
+      ((0 << RADEON_LINE_CURRENT_PTR_SHIFT) |
+       (1 << RADEON_LINE_CURRENT_COUNT_SHIFT));
+
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4);
+
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] = 
+      ((0x00 << RADEON_STENCIL_REF_SHIFT) |
+       (0xff << RADEON_STENCIL_MASK_SHIFT) |
+       (0xff << RADEON_STENCIL_WRITEMASK_SHIFT));
+
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = RADEON_ROP_COPY;
+   rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff;
+
+   rmesa->hw.msc.cmd[MSC_RE_MISC] = 
+      ((0 << RADEON_STIPPLE_X_OFFSET_SHIFT) |
+       (0 << RADEON_STIPPLE_Y_OFFSET_SHIFT) |
+       RADEON_STIPPLE_BIG_BIT_ORDER);
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000;
+
+   rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] = RADEON_BORDER_MODE_OGL;
+   rmesa->hw.tex[0].cmd[TEX_PP_TXFORMAT] = 
+      (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+       RADEON_TXFORMAT_PERSPECTIVE_ENABLE |
+       RADEON_TXFORMAT_ST_ROUTE_STQ0 |
+       (2 << RADEON_TXFORMAT_WIDTH_SHIFT) |
+       (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
+   rmesa->hw.tex[0].cmd[TEX_PP_TXOFFSET] = 0x2000;
+   rmesa->hw.tex[0].cmd[TEX_PP_BORDER_COLOR] = 0;
+   rmesa->hw.tex[0].cmd[TEX_PP_TXCBLEND] =  
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_CURRENT_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX);
+   rmesa->hw.tex[0].cmd[TEX_PP_TXABLEND] = 
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX);
+   rmesa->hw.tex[0].cmd[TEX_PP_TFACTOR] = 0;
+
+   rmesa->hw.tex[1].cmd[TEX_PP_TXFILTER] = RADEON_BORDER_MODE_OGL;
+   rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] = 
+      (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+       RADEON_TXFORMAT_PERSPECTIVE_ENABLE |
+       RADEON_TXFORMAT_ST_ROUTE_STQ1 |
+       (2 << RADEON_TXFORMAT_WIDTH_SHIFT) |
+       (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
+   rmesa->hw.tex[1].cmd[TEX_PP_TXOFFSET] = 0x8000;
+   rmesa->hw.tex[1].cmd[TEX_PP_BORDER_COLOR] = 0;
+   rmesa->hw.tex[1].cmd[TEX_PP_TXCBLEND] =     
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_CURRENT_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX);
+   rmesa->hw.tex[1].cmd[TEX_PP_TXABLEND] = 
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX);
+   rmesa->hw.tex[1].cmd[TEX_PP_TFACTOR] = 0;
+
+   /* Can oly add ST1 at the time of doing some multitex but can keep
+    * it after that.  Errors if DIFFUSE is missing.
+    */
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = 
+      (RADEON_TCL_VTX_Z0 |
+       RADEON_TCL_VTX_W0 |
+       RADEON_TCL_VTX_PK_DIFFUSE
+	 );	/* need to keep this uptodate */
+						   
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] =
+      ( RADEON_TCL_COMPUTE_XYZW 	|
+	(RADEON_TCL_TEX_INPUT_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
+	(RADEON_TCL_TEX_INPUT_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT) |
+	(RADEON_TCL_TEX_INPUT_TEX_2 << RADEON_TCL_TEX_2_OUTPUT_SHIFT));
+
+
+   /* XXX */
+   rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_0] = 
+      ((MODEL << RADEON_MODELVIEW_0_SHIFT) |
+       (MODEL_IT << RADEON_IT_MODELVIEW_0_SHIFT));
+
+   rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_1] = 
+      ((MODEL_PROJ << RADEON_MODELPROJECT_0_SHIFT) |
+       (TEXMAT_0 << RADEON_TEXMAT_0_SHIFT) |
+       (TEXMAT_1 << RADEON_TEXMAT_1_SHIFT));
+
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = 
+      (RADEON_UCP_IN_CLIP_SPACE |
+       RADEON_CULL_FRONT_IS_CCW);
+
+   rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = 0; 
+
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = 
+      (RADEON_SPECULAR_LIGHTS |
+       RADEON_DIFFUSE_SPECULAR_COMBINE |
+       RADEON_LOCAL_LIGHT_VEC_GL |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_EMISSIVE_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_AMBIENT_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_DIFFUSE_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_SPECULAR_SOURCE_SHIFT)); 
+
+   for (i = 0 ; i < 8; i++) {
+      struct gl_light *l = &ctx->Light.Light[i];
+      GLenum p = GL_LIGHT0 + i;
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX;
+
+      ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient );
+      ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse );
+      ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular );
+      ctx->Driver.Lightfv( ctx, p, GL_POSITION, 0 );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, 0 );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff );
+      ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION,
+			   &l->ConstantAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION, 
+			   &l->LinearAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION, 
+		     &l->QuadraticAttenuation );
+   }
+
+   ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT, 
+			     ctx->Light.Model.Ambient );
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx );
+
+   for (i = 0 ; i < 6; i++) {
+      ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL );
+   }
+
+   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 );
+   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, 0 );
+   
+   
+   /* Set up vector and scalar state commands:
+    */
+/*     upload_matrix( rmesa, ctx->ModelView.m, MODEL ); */
+/*     upload_matrix_t( rmesa, ctx->ModelView.inv, MODEL_IT ); */
+/*     upload_matrix( rmesa, ctx->TextureMatrix[0].m, TEXMAT_0 ); */
+/*     upload_matrix( rmesa, ctx->TextureMatrix[1].m, TEXMAT_1 ); */
+/*     upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, TEXMAT_2 ); */
+
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE;
+
+   rmesa->hw.eye.cmd[EYE_X] = 0;
+   rmesa->hw.eye.cmd[EYE_Y] = 0;
+   rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
+   rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c
new file mode 100644
index 000000000..2194add40
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c
@@ -0,0 +1,1189 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c,v 1.4 2003/02/15 22:18:48 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "enums.h"
+#include "mem.h"
+#include "mmath.h"
+#include "macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_swtcl.h"
+#include "radeon_tcl.h"
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+
+#define RADEON_XYZW_BIT		0x01
+#define RADEON_RGBA_BIT		0x02
+#define RADEON_SPEC_BIT		0x04
+#define RADEON_TEX0_BIT		0x08
+#define RADEON_TEX1_BIT		0x10
+#define RADEON_PTEX_BIT		0x20
+#define RADEON_MAX_SETUP	0x40
+
+static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
+static void flush_last_swtcl_prim_compat( radeonContextPtr rmesa );
+
+static struct {
+   void                (*emit)( GLcontext *, GLuint, GLuint, void *, GLuint );
+   interp_func		interp;
+   copy_pv_func	        copy_pv;
+   GLboolean           (*check_tex_sizes)( GLcontext *ctx );
+   GLuint               vertex_size;
+   GLuint               vertex_stride_shift;
+   GLuint               vertex_format;
+} setup_tab[RADEON_MAX_SETUP];
+
+
+#define TINY_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR)
+
+#define NOTEX_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC)
+
+#define TEX0_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC |	\
+					 RADEON_CP_VC_FRMT_ST0)
+
+#define TEX1_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC |	\
+					 RADEON_CP_VC_FRMT_ST0 |	\
+					 RADEON_CP_VC_FRMT_ST1)
+
+#define PROJ_TEX1_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC |	\
+					 RADEON_CP_VC_FRMT_ST0 |	\
+					 RADEON_CP_VC_FRMT_Q0 |         \
+					 RADEON_CP_VC_FRMT_ST1 |	\
+					 RADEON_CP_VC_FRMT_Q1)
+
+#define TEX2_VERTEX_FORMAT 0
+#define TEX3_VERTEX_FORMAT 0
+#define PROJ_TEX3_VERTEX_FORMAT 0
+
+#define DO_XYZW (IND & RADEON_XYZW_BIT)
+#define DO_RGBA (IND & RADEON_RGBA_BIT)
+#define DO_SPEC (IND & RADEON_SPEC_BIT)
+#define DO_FOG  (IND & RADEON_SPEC_BIT)
+#define DO_TEX0 (IND & RADEON_TEX0_BIT)
+#define DO_TEX1 (IND & RADEON_TEX1_BIT)
+#define DO_TEX2 0
+#define DO_TEX3 0
+#define DO_PTEX (IND & RADEON_PTEX_BIT)
+
+#define VERTEX radeonVertex
+#define VERTEX_COLOR radeon_color_t
+#define GET_VIEWPORT_MAT() 0
+#define GET_TEXSOURCE(n)  n
+#define GET_VERTEX_FORMAT() RADEON_CONTEXT(ctx)->swtcl.vertex_format
+#define GET_VERTEX_STORE() RADEON_CONTEXT(ctx)->swtcl.verts
+#define GET_VERTEX_STRIDE_SHIFT() RADEON_CONTEXT(ctx)->swtcl.vertex_stride_shift
+#define GET_UBYTE_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteColor
+#define GET_UBYTE_SPEC_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteSecondaryColor
+
+#define HAVE_HW_VIEWPORT    1
+/* Tiny vertices don't seem to work atm - haven't looked into why.
+ */
+#define HAVE_HW_DIVIDE      (IND & ~(RADEON_XYZW_BIT|RADEON_RGBA_BIT))
+#define HAVE_TINY_VERTICES  1
+#define HAVE_RGBA_COLOR     1
+#define HAVE_NOTEX_VERTICES 1
+#define HAVE_TEX0_VERTICES  1
+#define HAVE_TEX1_VERTICES  1
+#define HAVE_TEX2_VERTICES  0
+#define HAVE_TEX3_VERTICES  0
+#define HAVE_PTEX_VERTICES  1
+
+#define CHECK_HW_DIVIDE    (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE| \
+                                                    DD_TRI_UNFILLED)))
+
+#define IMPORT_QUALIFIER
+#define IMPORT_FLOAT_COLORS radeon_import_float_colors
+#define IMPORT_FLOAT_SPEC_COLORS radeon_import_float_spec_colors
+
+#define INTERP_VERTEX setup_tab[RADEON_CONTEXT(ctx)->swtcl.SetupIndex].interp
+#define COPY_PV_VERTEX setup_tab[RADEON_CONTEXT(ctx)->swtcl.SetupIndex].copy_pv
+
+
+/***********************************************************************
+ *         Generate  pv-copying and translation functions              *
+ ***********************************************************************/
+
+#define TAG(x) radeon_##x
+#define IND ~0
+#include "tnl_dd/t_dd_vb.c"
+#undef IND
+
+
+/***********************************************************************
+ *             Generate vertex emit and interp functions               *
+ ***********************************************************************/
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_TEX1_BIT|\
+             RADEON_PTEX_BIT)
+#define TAG(x) x##_wgpt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT|RADEON_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT|RADEON_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT|RADEON_TEX1_BIT|RADEON_PTEX_BIT)
+#define TAG(x) x##_wgfspt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+
+/***********************************************************************
+ *                         Initialization 
+ ***********************************************************************/
+
+static void init_setup_tab( void )
+{
+   init_wg();
+   init_wgt0();
+   init_wgpt0();
+   init_wgt0t1();
+   init_wgpt0t1();
+   init_wgfs();
+   init_wgfst0();
+   init_wgfspt0();
+   init_wgfst0t1();
+   init_wgfspt0t1();
+}
+
+
+
+void radeonPrintSetupFlags(char *msg, GLuint flags )
+{
+   fprintf(stderr, "%s(%x): %s%s%s%s%s%s\n",
+	   msg,
+	   (int)flags,
+	   (flags & RADEON_XYZW_BIT)      ? " xyzw," : "",
+	   (flags & RADEON_RGBA_BIT)     ? " rgba," : "",
+	   (flags & RADEON_SPEC_BIT)     ? " spec/fog," : "",
+	   (flags & RADEON_TEX0_BIT)     ? " tex-0," : "",
+	   (flags & RADEON_TEX1_BIT)     ? " tex-1," : "",
+	   (flags & RADEON_PTEX_BIT)     ? " proj-tex," : "");
+}
+
+
+static void radeonRenderStart( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (!setup_tab[rmesa->swtcl.SetupIndex].check_tex_sizes(ctx)) {
+      GLuint ind = rmesa->swtcl.SetupIndex |= (RADEON_PTEX_BIT|RADEON_RGBA_BIT);
+
+      /* Radeon handles projective textures nicely; just have to change
+       * up to the new vertex format.
+       */
+      if (setup_tab[ind].vertex_format != rmesa->swtcl.vertex_format) {
+	 RADEON_NEWPRIM(rmesa);
+	 rmesa->swtcl.vertex_format = setup_tab[ind].vertex_format;
+	 rmesa->swtcl.vertex_size = setup_tab[ind].vertex_size;
+	 rmesa->swtcl.vertex_stride_shift = setup_tab[ind].vertex_stride_shift;
+      }
+
+      if (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	 tnl->Driver.Render.Interp = setup_tab[rmesa->swtcl.SetupIndex].interp;
+	 tnl->Driver.Render.CopyPV = setup_tab[rmesa->swtcl.SetupIndex].copy_pv;
+      }
+   }
+   
+   if (rmesa->dma.flush != 0 && 
+       rmesa->dma.flush != flush_last_swtcl_prim &&
+       rmesa->dma.flush != flush_last_swtcl_prim_compat)
+      rmesa->dma.flush( rmesa );
+}
+
+
+void radeonBuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+			   GLuint newinputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   GLubyte *v = ((GLubyte *)rmesa->swtcl.verts + 
+		 (start << rmesa->swtcl.vertex_stride_shift));
+   GLuint stride = 1 << rmesa->swtcl.vertex_stride_shift;
+
+   newinputs |= rmesa->swtcl.SetupNewInputs;
+   rmesa->swtcl.SetupNewInputs = 0;
+
+   if (!newinputs)
+      return;
+
+   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, v, stride );
+}
+
+void radeonChooseVertexState( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint ind = (RADEON_XYZW_BIT | RADEON_RGBA_BIT);
+
+   if (!rmesa->TclFallback || rmesa->Fallback)
+      return;
+
+   if (ctx->Fog.Enabled || (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR))
+      ind |= RADEON_SPEC_BIT;
+
+   if (ctx->Texture._ReallyEnabled & TEXTURE1_ANY)
+      ind |= RADEON_TEX0_BIT|RADEON_TEX1_BIT;
+   else if (ctx->Texture._ReallyEnabled & TEXTURE0_ANY)
+      ind |= RADEON_TEX0_BIT;
+
+   rmesa->swtcl.SetupIndex = ind;
+
+   if (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED)) {
+      tnl->Driver.Render.Interp = radeon_interp_extras;
+      tnl->Driver.Render.CopyPV = radeon_copy_pv_extras;
+   }
+   else {
+      tnl->Driver.Render.Interp = setup_tab[ind].interp;
+      tnl->Driver.Render.CopyPV = setup_tab[ind].copy_pv;
+   }
+
+   if (setup_tab[ind].vertex_format != rmesa->swtcl.vertex_format) {
+      RADEON_NEWPRIM(rmesa);
+      rmesa->swtcl.vertex_format = setup_tab[ind].vertex_format;
+      rmesa->swtcl.vertex_size = setup_tab[ind].vertex_size;
+      rmesa->swtcl.vertex_stride_shift = setup_tab[ind].vertex_stride_shift;
+   }
+
+   {
+      GLuint se_coord_fmt, needproj;
+
+      /* HW perspective divide is a win, but tiny vertex formats are a
+       * bigger one.
+       */
+      if (setup_tab[ind].vertex_format == TINY_VERTEX_FORMAT ||
+	  (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	 needproj = GL_TRUE;
+	 se_coord_fmt = (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+			 RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
+			 RADEON_TEX1_W_ROUTING_USE_Q1);
+      }
+      else {
+	 needproj = GL_FALSE;
+	 se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+			 RADEON_TEX1_W_ROUTING_USE_Q1);
+      }
+
+      if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
+	 RADEON_STATECHANGE( rmesa, set );
+	 rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+      }
+      _tnl_need_projected_coords( ctx, needproj );
+   }
+}
+
+
+/* Flush vertices in the current dma region.
+ */
+static void flush_last_swtcl_prim( radeonContextPtr rmesa  )
+{
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->dma.current.buf) {
+      struct radeon_dma_region *current = &rmesa->dma.current;
+      GLuint current_offset = (rmesa->radeonScreen->agp_buffer_offset +
+			       current->buf->buf->idx * RADEON_BUFFER_SIZE + 
+			       current->start);
+
+      assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+
+      assert (current->start + 
+	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+	      current->ptr);
+
+      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+	 radeonEmitVertexAOS( rmesa,
+			      rmesa->swtcl.vertex_size,
+			      current_offset);
+
+	 radeonEmitVbufPrim( rmesa,
+			     rmesa->swtcl.vertex_format,
+			     rmesa->swtcl.hw_primitive,
+			     rmesa->swtcl.numverts);
+      }
+
+      rmesa->swtcl.numverts = 0;
+      current->start = current->ptr;
+
+      rmesa->dma.flush = 0;
+   }
+}
+
+
+static void flush_last_swtcl_prim_compat( radeonContextPtr rmesa )
+{
+   struct radeon_dma_region *current = &rmesa->dma.current;
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s buf %p start %d ptr %d\n", 
+	      __FUNCTION__,
+	      current->buf,
+	      current->start,
+	      current->ptr);
+
+   assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   assert (current->start + 
+	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+	   current->ptr);
+   assert (current->start == 0);
+
+   if (current->ptr && current->buf) {
+      assert (current->buf->refcount == 1);
+
+      radeonCompatEmitPrimitive( rmesa,
+				 rmesa->swtcl.vertex_format,
+				 rmesa->swtcl.hw_primitive,
+				 rmesa->swtcl.numverts);
+      
+      /* The buffer has been released:
+       */
+      FREE(current->buf);
+      current->buf = 0;
+      current->start = 0;
+      current->ptr = current->end;
+
+   }
+
+   rmesa->swtcl.numverts = 0;
+   rmesa->dma.flush = 0;
+}
+
+
+/* Alloc space in the current dma region.
+ */
+static __inline void *radeonAllocDmaLowVerts( radeonContextPtr rmesa,
+					      int nverts, int vsize )
+{
+   GLuint bytes = vsize * nverts;
+
+   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+      radeonRefillCurrentDmaRegion( rmesa );
+
+   if (!rmesa->dma.flush) {
+      if (rmesa->dri.drmMinor == 1)
+	 rmesa->dma.flush = flush_last_swtcl_prim_compat;
+      else
+	 rmesa->dma.flush = flush_last_swtcl_prim;
+   }
+
+   assert( vsize == rmesa->swtcl.vertex_size * 4 );
+   assert( rmesa->dma.flush == flush_last_swtcl_prim ||
+	   rmesa->dma.flush == flush_last_swtcl_prim_compat);
+   assert (rmesa->dma.current.start + 
+	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+	   rmesa->dma.current.ptr);
+
+
+   {
+      GLubyte *head = rmesa->dma.current.address + rmesa->dma.current.ptr;
+      rmesa->dma.current.ptr += bytes;
+      rmesa->swtcl.numverts += nverts;
+      return head;
+   }
+
+}
+
+
+
+
+void radeon_emit_contiguous_verts( GLcontext *ctx, GLuint start, GLuint count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint vertex_size = rmesa->swtcl.vertex_size * 4;
+   CARD32 *dest = radeonAllocDmaLowVerts( rmesa, count-start, vertex_size );
+   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, dest, 
+					    vertex_size );
+}
+
+
+
+void radeon_emit_indexed_verts( GLcontext *ctx, GLuint start, GLuint count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   radeonAllocDmaRegionVerts( rmesa, 
+			      &rmesa->swtcl.indexed_verts, 
+			      count - start,
+			      rmesa->swtcl.vertex_size * 4, 
+			      64);
+
+   setup_tab[rmesa->swtcl.SetupIndex].emit( 
+      ctx, start, count, 
+      rmesa->swtcl.indexed_verts.address + rmesa->swtcl.indexed_verts.start, 
+      rmesa->swtcl.vertex_size * 4 );
+}
+
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    0
+#define HAVE_ELTS        1
+
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   0,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN,
+   0,
+   0,
+   0
+};
+
+static __inline void radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
+{
+   RADEON_NEWPRIM( rmesa );
+   rmesa->swtcl.hw_primitive = hw_prim[prim];
+   assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
+}
+
+static __inline void radeonEltPrimitive( radeonContextPtr rmesa, GLenum prim )
+{
+   RADEON_NEWPRIM( rmesa );
+   rmesa->swtcl.hw_primitive = hw_prim[prim] | RADEON_CP_VC_CNTL_PRIM_WALK_IND;
+}
+
+
+static void VERT_FALLBACK( GLcontext *ctx,
+			   GLuint start,
+			   GLuint count,
+			   GLuint flags )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
+   tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
+   tnl->Driver.Render.PrimTabVerts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
+   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_CLIP;
+}
+
+static void ELT_FALLBACK( GLcontext *ctx,
+			  GLuint start,
+			  GLuint count,
+			  GLuint flags )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
+   tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
+   tnl->Driver.Render.PrimTabElts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
+   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_CLIP;
+}
+
+
+#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define ELTS_VARS  GLushort *dest
+#define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
+#define ELT_INIT(prim) radeonEltPrimitive( rmesa, prim )
+#define NEW_PRIMITIVE()  RADEON_NEWPRIM( rmesa )
+#define NEW_BUFFER()  radeonRefillCurrentDmaRegion( rmesa )
+#define GET_CURRENT_VB_MAX_VERTS() \
+  (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
+#define GET_SUBSEQUENT_VB_MAX_VERTS() \
+  ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
+
+#if RADEON_OLD_PACKETS
+# define GET_CURRENT_VB_MAX_ELTS() \
+  ((RADEON_CMD_BUF_SZ - (rmesa->store.cmd_used + 24)) / 2)
+#else
+# define GET_CURRENT_VB_MAX_ELTS() \
+  ((RADEON_CMD_BUF_SZ - (rmesa->store.cmd_used + 16)) / 2)
+#endif
+#define GET_SUBSEQUENT_VB_MAX_ELTS() \
+  ((RADEON_CMD_BUF_SZ - 1024) / 2)
+
+
+
+/* How do you extend an existing primitive?
+ */
+#define ALLOC_ELTS(nr)							\
+do {									\
+   if (rmesa->dma.flush == radeonFlushElts &&				\
+       rmesa->store.cmd_used + nr*2 < RADEON_CMD_BUF_SZ) {		\
+									\
+      dest = (GLushort *)(rmesa->store.cmd_buf +			\
+			  rmesa->store.cmd_used);			\
+      rmesa->store.cmd_used += nr*2;					\
+   }									\
+   else {								\
+      if (rmesa->dma.flush) {						\
+	 rmesa->dma.flush( rmesa );					\
+      }									\
+									\
+      radeonEmitVertexAOS( rmesa,					\
+			   rmesa->swtcl.vertex_size,			\
+			   (rmesa->radeonScreen->agp_buffer_offset +		\
+			    rmesa->swtcl.indexed_verts.buf->buf->idx * 	\
+			    RADEON_BUFFER_SIZE +			\
+			    rmesa->swtcl.indexed_verts.start));		\
+									\
+      dest = radeonAllocEltsOpenEnded( rmesa,				\
+				       rmesa->swtcl.vertex_format,	\
+				       rmesa->swtcl.hw_primitive,	\
+				       nr );				\
+   }									\
+} while (0)
+
+#define ALLOC_ELTS_NEW_PRIMITIVE(nr) ALLOC_ELTS( nr )
+
+#ifdef MESA_BIG_ENDIAN
+/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
+#define EMIT_ELT(offset, x) do {				\
+	int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );	\
+	GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );	\
+	(des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); } while (0)
+#else
+#define EMIT_ELT(offset, x) (dest)[offset] = (GLushort) (x)
+#endif
+#define EMIT_TWO_ELTS(offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
+#define INCR_ELTS( nr ) dest += nr
+#define RELEASE_ELT_VERTS() \
+  radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ )
+#define EMIT_VERTS( ctx, j, nr ) \
+  radeon_emit_contiguous_verts(ctx, j, (j)+(nr))
+#define EMIT_INDEXED_VERTS( ctx, start, count ) \
+  radeon_emit_indexed_verts( ctx, start, count )
+
+
+#define TAG(x) radeon_dma_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+static GLboolean radeon_run_render( GLcontext *ctx,
+				    struct gl_pipeline_stage *stage )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i, length, flags = 0;
+   render_func *tab = TAG(render_tab_verts);
+
+   if (rmesa->swtcl.indexed_verts.buf && (!VB->Elts || stage->changed_inputs)) 
+      RELEASE_ELT_VERTS();
+   	
+   if (VB->ClipOrMask ||	     /* No clipping */
+       rmesa->swtcl.RenderIndex != 0 ||    /* No per-vertex manipulations */
+       ctx->Line.StippleFlag)        /* GH: THIS IS A HACK!!! */
+      return GL_TRUE;		
+
+   if (rmesa->dri.drmMinor < 3) {
+      /* drm 1.1 doesn't support vertex primitives starting in the
+       * middle of a buffer.  It doesn't support sane indexed vertices
+       * either.  drm 1.2 fixes both of these problems, but we don't have a
+       * compatibility layer to that version yet.  
+       */
+      return GL_TRUE;
+   }
+		
+   tnl->Driver.Render.Start( ctx );
+
+   if (VB->Elts) {
+      tab = TAG(render_tab_elts);
+      if (!rmesa->swtcl.indexed_verts.buf)
+	 if (!TAG(emit_elt_verts)(ctx, 0, VB->Count))
+	    return GL_TRUE;	/* too many vertices */
+   }
+
+   for (i = 0 ; !(flags & PRIM_LAST) ; i += length)
+   {
+      flags = VB->Primitive[i];
+      length = VB->PrimitiveLength[i];
+
+      if (RADEON_DEBUG & DEBUG_PRIMS)
+	 fprintf(stderr, "radeon_render.c: prim %s %d..%d\n", 
+		 _mesa_lookup_enum_by_nr(flags & PRIM_MODE_MASK), 
+		 i, i+length);
+
+      if (length)
+	 tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+   }
+
+   tnl->Driver.Render.Finish( ctx );
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+
+
+static void radeon_check_render( GLcontext *ctx,
+				 struct gl_pipeline_stage *stage )
+{
+   GLuint inputs = VERT_OBJ|VERT_CLIP|VERT_RGBA;
+
+   if (ctx->RenderMode == GL_RENDER) {
+      if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+	 inputs |= VERT_SPEC_RGB;
+
+      if (ctx->Texture.Unit[0]._ReallyEnabled)
+	 inputs |= VERT_TEX(0);
+
+      if (ctx->Texture.Unit[1]._ReallyEnabled)
+	 inputs |= VERT_TEX(1);
+
+      if (ctx->Fog.Enabled)
+	 inputs |= VERT_FOG_COORD;
+   }
+
+   stage->inputs = inputs;
+}
+
+
+static void dtr( struct gl_pipeline_stage *stage )
+{
+   (void)stage;
+}
+
+
+const struct gl_pipeline_stage _radeon_render_stage =
+{
+   "radeon render",
+   (_DD_NEW_SEPARATE_SPECULAR |
+    _NEW_TEXTURE|
+    _NEW_FOG|
+    _NEW_RENDERMODE),		/* re-check (new inputs) */
+   0,				/* re-run (always runs) */
+   GL_TRUE,			/* active */
+   0, 0,			/* inputs (set in check_render), outputs */
+   0, 0,			/* changed_inputs, private */
+   dtr,				/* destructor */
+   radeon_check_render,		/* check - initially set to alloc data */
+   radeon_run_render		/* run */
+};
+
+
+
+/**************************************************************************/
+
+
+static const GLuint reduced_hw_prim[GL_POLYGON+1] = {
+   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+};
+
+static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim );
+static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim );
+static void radeonResetLineStipple( GLcontext *ctx );
+
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#define CTX_ARG radeonContextPtr rmesa
+#define CTX_ARG2 rmesa
+#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, size * 4 )
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
+   const GLuint shift = rmesa->swtcl.vertex_stride_shift;	\
+   const char *radeonverts = (char *)rmesa->swtcl.verts;
+#define VERT(x) (radeonVertex *)(radeonverts + (x << shift))
+#define VERTEX radeonVertex 
+#undef TAG
+#define TAG(x) radeon_##x
+#include "tnl_dd/t_dd_triemit.h"
+
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define QUAD( a, b, c, d ) radeon_quad( rmesa, a, b, c, d )
+#define TRI( a, b, c )     radeon_triangle( rmesa, a, b, c )
+#define LINE( a, b )       radeon_line( rmesa, a, b )
+#define POINT( a )         radeon_point( rmesa, a )
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define RADEON_TWOSIDE_BIT	0x01
+#define RADEON_UNFILLED_BIT	0x02
+#define RADEON_OFFSET_BIT	0x04 /* drmMinor == 1 */
+#define RADEON_MAX_TRIFUNC	0x08
+
+
+static struct {
+   points_func	        points;
+   line_func		line;
+   triangle_func	triangle;
+   quad_func		quad;
+} rast_tab[RADEON_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK  0
+#define DO_OFFSET   (IND & RADEON_OFFSET_BIT)
+#define DO_UNFILLED (IND & RADEON_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & RADEON_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_RGBA   1
+#define HAVE_SPEC   1
+#define HAVE_INDEX  0
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define TAB rast_tab
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (rmesa->swtcl.verts + (e<<rmesa->swtcl.vertex_stride_shift))
+
+#define VERT_SET_RGBA( v, c )    v->ui[coloroffset] = LE32_TO_CPU(*(GLuint *)c)
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    color[idx] = CPU_TO_LE32(v[idx]->ui[coloroffset])
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = LE32_TO_CPU(color[idx])
+
+#define VERT_SET_SPEC( v0, c )   if (havespec) {			\
+					v0->v.specular.red   = (c)[0];	\
+					v0->v.specular.green = (c)[1];	\
+					v0->v.specular.blue  = (c)[2]; }
+#define VERT_COPY_SPEC( v0, v1 ) if (havespec) {					\
+					v0->v.specular.red   = v1->v.specular.red;	\
+					v0->v.specular.green = v1->v.specular.green;	\
+					v0->v.specular.blue  = v1->v.specular.blue; }
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = CPU_TO_LE32(v[idx]->ui[5])
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = LE32_TO_CPU(spec[idx])
+
+#undef LOCAL_VARS
+#define LOCAL_VARS(n)							\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
+   GLuint color[n], spec[n];						\
+   GLuint coloroffset = (rmesa->swtcl.vertex_size == 4 ? 3 : 4);	\
+   GLboolean havespec = (rmesa->swtcl.vertex_size > 4);			\
+   (void) color; (void) spec; (void) coloroffset; (void) havespec;
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
+#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#undef TAG
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT|RADEON_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT|RADEON_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_UNFILLED_BIT|RADEON_OFFSET_BIT)
+#define TAG(x) x##_unfilled_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT|RADEON_UNFILLED_BIT|RADEON_OFFSET_BIT)
+#define TAG(x) x##_twoside_unfilled_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_twoside();
+   init_unfilled();
+   init_twoside_unfilled();
+   init_offset();
+   init_twoside_offset();
+   init_unfilled_offset();
+   init_twoside_unfilled_offset();
+}
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define VERT(x) (radeonVertex *)(radeonverts + (x << shift))
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      radeon_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   radeon_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   radeon_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   radeon_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#undef INIT
+#define INIT(x) do {					\
+   radeonRenderPrimitive( ctx, x );			\
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
+   const GLuint shift = rmesa->swtcl.vertex_stride_shift;		\
+   const char *radeonverts = (char *)rmesa->swtcl.verts;		\
+   const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+   const GLboolean stipple = ctx->Line.StippleFlag;		\
+   (void) elt; (void) stipple;
+#define RESET_STIPPLE	if ( stipple ) radeonResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) radeon_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) radeon_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+void radeonChooseRenderState( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint index = 0;
+   GLuint flags = ctx->_TriangleCaps;
+
+   if (!rmesa->TclFallback || rmesa->Fallback) 
+      return;
+
+   if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
+   if (flags & DD_TRI_UNFILLED)      index |= RADEON_UNFILLED_BIT;
+   if ((flags & DD_TRI_OFFSET) &&
+       rmesa->dri.drmMinor == 1)  index |= RADEON_OFFSET_BIT;
+
+   if (index != rmesa->swtcl.RenderIndex) {
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = radeon_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = radeon_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = radeon_fast_clipped_poly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+      }
+
+      rmesa->swtcl.RenderIndex = index;
+   }
+}
+
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+
+static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (rmesa->swtcl.hw_primitive != hwprim) {
+      RADEON_NEWPRIM( rmesa );
+      rmesa->swtcl.hw_primitive = hwprim;
+   }
+}
+
+static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   rmesa->swtcl.render_primitive = prim;
+   if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
+      radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
+}
+
+static void radeonRenderFinish( GLcontext *ctx )
+{
+}
+
+static void radeonResetLineStipple( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, lin );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static char *fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "glBlendEquation",
+   "glBlendFunc(mode != ADD)"
+   "RADEON_NO_RAST"
+};
+
+
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->Fallback;
+
+   if (mode) {
+      rmesa->Fallback |= bit;
+      if (oldfallback == 0) {
+	 RADEON_FIREVERTICES( rmesa );
+	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
+	 _swsetup_Wakeup( ctx );
+	 _tnl_need_projected_coords( ctx, GL_TRUE );
+	 rmesa->swtcl.RenderIndex = ~0;
+         if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+            fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+   else {
+      rmesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = radeonRenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
+	 tnl->Driver.Render.Finish = radeonRenderFinish;
+	 tnl->Driver.Render.BuildVertices = radeonBuildVertices;
+	 tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
+	 if (rmesa->TclFallback) {
+	    /* These are already done if rmesa->TclFallback goes to
+	     * zero above. But not if it doesn't (RADEON_NO_TCL for
+	     * example?)
+	     */
+	    radeonChooseVertexState( ctx );
+	    radeonChooseRenderState( ctx );
+	 }
+         if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+            fprintf(stderr, "Radeon end rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+}
+
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void radeonInitSwtcl( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint size = TNL_CONTEXT(ctx)->vb.Size;
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      init_setup_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.Render.Start = radeonRenderStart;
+   tnl->Driver.Render.Finish = radeonRenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+   tnl->Driver.Render.BuildVertices = radeonBuildVertices;
+
+   rmesa->swtcl.verts = (char *)ALIGN_MALLOC( size * 16 * 4, 32 );
+   rmesa->swtcl.RenderIndex = ~0;
+   rmesa->swtcl.render_primitive = GL_TRIANGLES;
+   rmesa->swtcl.hw_primitive = 0;
+}
+
+
+void radeonDestroySwtcl( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (rmesa->swtcl.indexed_verts.buf) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+			      __FUNCTION__ );
+
+   if (rmesa->swtcl.verts) {
+      ALIGN_FREE(rmesa->swtcl.verts);
+      rmesa->swtcl.verts = 0;
+   }
+
+   if (rmesa->UbyteSecondaryColor.Ptr) {
+      ALIGN_FREE(rmesa->UbyteSecondaryColor.Ptr);
+      rmesa->UbyteSecondaryColor.Ptr = 0;
+   }
+
+   if (rmesa->UbyteColor.Ptr) {
+      ALIGN_FREE(rmesa->UbyteColor.Ptr);
+      rmesa->UbyteColor.Ptr = 0;
+   }
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.h
new file mode 100644
index 000000000..fe874cd7b
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.h
@@ -0,0 +1,76 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.h,v 1.1 2002/10/30 12:51:57 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef __RADEON_TRIS_H__
+#define __RADEON_TRIS_H__
+
+#include "mtypes.h"
+#include "swrast/swrast.h"
+#include "radeon_context.h"
+
+extern void radeonInitSwtcl( GLcontext *ctx );
+extern void radeonDestroySwtcl( GLcontext *ctx );
+
+extern void radeonChooseRenderState( GLcontext *ctx );
+extern void radeonChooseVertexState( GLcontext *ctx );
+
+extern void radeonCheckTexSizes( GLcontext *ctx );
+
+extern void radeonBuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+				 GLuint newinputs );
+
+extern void radeonPrintSetupFlags(char *msg, GLuint flags );
+
+
+extern void radeon_emit_contiguous_verts( GLcontext *ctx,
+					  GLuint start,
+					  GLuint count );
+
+extern void radeon_emit_indexed_verts( GLcontext *ctx,
+				       GLuint start,
+				       GLuint count );
+
+extern void radeon_translate_vertex( GLcontext *ctx, 
+				     const radeonVertex *src, 
+				     SWvertex *dst );
+
+extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
+
+extern void radeon_import_float_colors( GLcontext *ctx );
+extern void radeon_import_float_spec_colors( GLcontext *ctx );
+
+
+
+#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.c
new file mode 100644
index 000000000..c8fc07474
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.c
@@ -0,0 +1,545 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.c,v 1.1 2002/10/30 12:51:57 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+
+#include "mmath.h"
+#include "mtypes.h"
+#include "enums.h"
+#include "colormac.h"
+#include "light.h"
+
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_LOOP   0
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        1
+
+
+#define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
+#define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
+#define HW_LINE_LOOP        0
+#define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
+#define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+#define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
+#define HW_TRIANGLE_STRIP_1 0
+#define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
+#define HW_QUADS            0
+#define HW_QUAD_STRIP       0
+#define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
+
+
+static GLboolean discreet_prim[0x10] = {
+   0,				/* none */
+   1,				/* points */
+   1,				/* lines */
+   0,				/* line_strip */
+   1,				/* tri_list */
+   0,				/* tri_fan */
+   0,				/* tri_type_2 */
+   1,				/* rect list (unused) */
+   1,				/* 3 vert point */
+   1,				/* 3 vert line */
+   0,
+   0,
+   0,
+   0,
+   0,
+};
+   
+
+#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define ELTS_VARS  GLushort *dest
+
+#define ELT_INIT(prim, hw_prim) \
+   radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
+
+#define GET_ELTS() rmesa->tcl.Elts
+
+
+#define NEW_PRIMITIVE()  RADEON_NEWPRIM( rmesa )
+#define NEW_BUFFER()  radeonRefillCurrentDmaRegion( rmesa )
+
+/* Don't really know how many elts will fit in what's left of cmdbuf,
+ * as there is state to emit, etc:
+ */
+
+#if 0
+#define GET_CURRENT_VB_MAX_ELTS() \
+   ((RADEON_CMD_BUF_SZ - (rmesa->store.cmd_used + 16)) / 2) 
+#define GET_SUBSEQUENT_VB_MAX_ELTS() ((RADEON_CMD_BUF_SZ - 16) / 2) 
+#else
+/* Testing on isosurf shows a maximum around here.  Don't know if it's
+ * the card or driver or kernel module that is causing the behaviour.
+ */
+#define GET_CURRENT_VB_MAX_ELTS() 300
+#define GET_SUBSEQUENT_VB_MAX_ELTS() 300
+#endif
+
+#define RESET_STIPPLE() do {			\
+   RADEON_STATECHANGE( rmesa, lin );		\
+   radeonEmitState( rmesa );			\
+} while (0)
+
+#define AUTO_STIPPLE( mode )  do {		\
+   RADEON_STATECHANGE( rmesa, lin );		\
+   if (mode)					\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |=	\
+	 RADEON_LINE_PATTERN_AUTO_RESET;	\
+   else						\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
+	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
+   radeonEmitState( rmesa );			\
+} while (0)
+
+
+/* How do you extend an existing primitive?
+ */
+#define ALLOC_ELTS(nr)							\
+do {									\
+   if (rmesa->dma.flush == radeonFlushElts &&				\
+       rmesa->store.cmd_used + nr*2 < RADEON_CMD_BUF_SZ) {		\
+									\
+      dest = (GLushort *)(rmesa->store.cmd_buf + 			\
+			  rmesa->store.cmd_used);			\
+      rmesa->store.cmd_used += nr*2;					\
+   }									\
+   else {								\
+      if (rmesa->dma.flush)						\
+	 rmesa->dma.flush( rmesa );					\
+									\
+      radeonEmitAOS( rmesa,						\
+	  	     rmesa->tcl.aos_components,				\
+		     rmesa->tcl.nr_aos_components,			\
+		     0 );						\
+									\
+      dest = radeonAllocEltsOpenEnded( rmesa,				\
+				       rmesa->tcl.vertex_format,	\
+				       rmesa->tcl.hw_primitive,		\
+				       nr );				\
+   }									\
+} while (0) 
+
+
+
+/* TODO: Try to extend existing primitive if both are identical,
+ * discreet and there are no intervening state changes.  (Somewhat
+ * duplicates changes to DrawArrays code)
+ */
+static void EMIT_PRIM( GLcontext *ctx, 
+		       GLenum prim, 
+		       GLuint hwprim, 
+		       GLuint start, 
+		       GLuint count)	
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   radeonTclPrimitive( ctx, prim, hwprim );
+   
+   radeonEmitAOS( rmesa,
+		  rmesa->tcl.aos_components,
+		  rmesa->tcl.nr_aos_components,
+		  start );
+   
+   /* Why couldn't this packet have taken an offset param?
+    */
+   radeonEmitVbufPrim( rmesa,
+		       rmesa->tcl.vertex_format,
+		       rmesa->tcl.hw_primitive,
+		       count - start );
+}
+
+
+
+/* Try & join small primitives
+ */
+#if 0
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
+#else
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )			\
+  ((NR) < 20 ||							\
+   ((NR) < 40 &&						\
+    rmesa->tcl.hw_primitive == (PRIM|				\
+			    RADEON_CP_VC_CNTL_PRIM_WALK_IND|	\
+			    RADEON_CP_VC_CNTL_TCL_ENABLE)))
+#endif
+
+#ifdef MESA_BIG_ENDIAN
+/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
+#define EMIT_ELT(offset, x) do {				\
+	int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );	\
+	GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );	\
+	(des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); } while (0)
+#else
+#define EMIT_ELT(offset, x) (dest)[offset] = (GLushort) (x)
+#endif
+#define EMIT_TWO_ELTS(offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
+#define INCR_ELTS( nr ) dest += nr
+#define RELEASE_ELT_VERTS() \
+   radeonReleaseArrays( ctx, ~0 )
+
+
+
+#define TAG(x) tcl_##x
+#include "tnl_dd/t_dd_dmatmp2.h"
+
+/**********************************************************************/
+/*                          External entrypoints                     */
+/**********************************************************************/
+
+void radeonEmitPrimitive( GLcontext *ctx, 
+			  GLuint first,
+			  GLuint last,
+			  GLuint flags )
+{
+   tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void radeonEmitEltPrimitive( GLcontext *ctx, 
+			     GLuint first,
+			     GLuint last,
+			     GLuint flags )
+{
+   tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void radeonTclPrimitive( GLcontext *ctx, 
+			 GLenum prim,
+			 int hw_prim )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint se_cntl;
+   GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
+
+   if (newprim != rmesa->tcl.hw_primitive ||
+       !discreet_prim[hw_prim&0xf]) {
+      RADEON_NEWPRIM( rmesa );
+      rmesa->tcl.hw_primitive = newprim;
+   }
+
+   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
+   se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
+
+   if (prim == GL_POLYGON && (ctx->_TriangleCaps & DD_FLATSHADE)) 
+      se_cntl |= RADEON_FLAT_SHADE_VTX_0;
+   else
+      se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
+
+   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
+   }
+}
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+/* TCL render.
+ */
+static GLboolean radeon_run_tcl_render( GLcontext *ctx,
+					struct gl_pipeline_stage *stage )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i,flags = 0,length;
+
+   /* TODO: separate this from the swtnl pipeline 
+    */
+   if (rmesa->TclFallback)
+      return GL_TRUE;	/* fallback to software t&l */
+
+   if (VB->Count == 0)
+      return GL_FALSE;
+
+   radeonReleaseArrays( ctx, stage->changed_inputs );
+   radeonEmitArrays( ctx, stage->inputs );
+
+   rmesa->tcl.Elts = VB->Elts;
+
+   for (i = VB->FirstPrimitive ; !(flags & PRIM_LAST) ; i += length)
+   {
+      flags = VB->Primitive[i];
+      length = VB->PrimitiveLength[i];
+
+      if (RADEON_DEBUG & DEBUG_PRIMS)
+	 fprintf(stderr, "%s: prim %s %d..%d\n", 
+		 __FUNCTION__,
+		 _mesa_lookup_enum_by_nr(flags & PRIM_MODE_MASK), 
+		 i, i+length);
+
+      if (!length)
+	 continue;
+
+      if (rmesa->tcl.Elts)
+	 radeonEmitEltPrimitive( ctx, i, i+length, flags );
+      else
+	 radeonEmitPrimitive( ctx, i, i+length, flags );
+   }
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+
+
+static void radeon_check_tcl_render( GLcontext *ctx,
+				     struct gl_pipeline_stage *stage )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint inputs = VERT_OBJ;
+
+   if (ctx->RenderMode == GL_RENDER) {
+      /* Make all this event-driven:
+       */
+      if (ctx->Light.Enabled) {
+	 inputs |= VERT_NORM;
+
+	 if (ctx->Light.ColorMaterialEnabled) {
+	    inputs |= VERT_RGBA;
+	 }
+      }
+      else {
+	 inputs |= VERT_RGBA;
+	 
+	 if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+	    inputs |= VERT_SPEC_RGB;
+	 }
+      }
+
+      if (ctx->Texture.Unit[0]._ReallyEnabled) {
+	 if (ctx->Texture.Unit[0].TexGenEnabled) {
+	    if (rmesa->TexGenNeedNormals[0]) {
+	       inputs |= VERT_NORM;
+	    }
+	 } else {
+	    inputs |= VERT_TEX(0);
+	 }
+      }
+
+      if (ctx->Texture.Unit[1]._ReallyEnabled) {
+	 if (ctx->Texture.Unit[1].TexGenEnabled) {
+	    if (rmesa->TexGenNeedNormals[1]) {
+	       inputs |= VERT_NORM;
+	    }
+	 } else {
+	    inputs |= VERT_TEX(1);
+	 }
+      }
+
+      stage->inputs = inputs;
+      stage->active = 1;
+   }
+   else
+      stage->active = 0;
+}
+
+static void radeon_init_tcl_render( GLcontext *ctx,
+				    struct gl_pipeline_stage *stage )
+{
+   stage->check = radeon_check_tcl_render;
+   stage->check( ctx, stage );
+}
+
+static void dtr( struct gl_pipeline_stage *stage )
+{
+   (void)stage;
+}
+
+
+/* Initial state for tcl stage.  
+ */
+const struct gl_pipeline_stage _radeon_tcl_stage =
+{
+   "radeon render",
+   (_DD_NEW_SEPARATE_SPECULAR |
+    _NEW_LIGHT|
+    _NEW_TEXTURE|
+    _NEW_FOG|
+    _NEW_RENDERMODE),		/* re-check (new inputs) */
+   0,				/* re-run (always runs) */
+   GL_TRUE,			/* active */
+   0, 0,			/* inputs (set in check_render), outputs */
+   0, 0,			/* changed_inputs, private */
+   dtr,				/* destructor */
+   radeon_init_tcl_render,	/* check - initially set to alloc data */
+   radeon_run_tcl_render	/* run */
+};
+
+
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+
+/*-----------------------------------------------------------------------
+ * Manage TCL fallbacks
+ */
+
+
+static void transition_to_swtnl( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint se_cntl;
+
+   RADEON_NEWPRIM( rmesa );
+   rmesa->swtcl.vertex_format = 0;
+
+   radeonChooseVertexState( ctx );
+   radeonChooseRenderState( ctx );
+
+   _mesa_validate_all_lighting_tables( ctx ); 
+
+   tnl->Driver.NotifyMaterialChange = 
+      _mesa_validate_all_lighting_tables;
+
+   radeonReleaseArrays( ctx, ~0 );
+
+   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
+   se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
+	 
+   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
+   }
+}
+
+
+static void transition_to_hwtnl( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+			  RADEON_TEX1_W_ROUTING_USE_Q1);
+
+   if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+      _tnl_need_projected_coords( ctx, GL_FALSE );
+   }
+
+   radeonUpdateMaterial( ctx );
+
+   tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+
+   if ( rmesa->dma.flush )			
+      rmesa->dma.flush( rmesa );	
+
+   rmesa->dma.flush = 0;
+   rmesa->swtcl.vertex_format = 0;
+   
+   if (rmesa->swtcl.indexed_verts.buf) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+			      __FUNCTION__ );
+
+   if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+      fprintf(stderr, "Radeon end tcl fallback\n");
+}
+
+static char *fallbackStrings[] = {
+   "Rasterization fallback",
+   "Unfilled triangles",
+   "Twosided lighting, differing materials",
+   "Materials in VB (maybe between begin/end)",
+   "Texgen unit 0",
+   "Texgen unit 1",
+   "Texgen unit 2",
+   "User disable"
+};
+
+
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+
+void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->TclFallback;
+
+   if (mode) {
+      rmesa->TclFallback |= bit;
+      if (oldfallback == 0) {
+	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
+		    getFallbackString( bit ));
+	 transition_to_swtnl( ctx );
+      }
+   }
+   else {
+      rmesa->TclFallback &= ~bit;
+      if (oldfallback == bit) {
+	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+	    fprintf(stderr, "Radeon end tcl fallback %s\n",
+		    getFallbackString( bit ));
+	 transition_to_hwtnl( ctx );
+      }
+   }
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h
new file mode 100644
index 000000000..c35916c6a
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h
@@ -0,0 +1,66 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h,v 1.2 2003/02/08 21:26:45 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Grahpics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAHPICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef __RADEON_TCL_H__
+#define __RADEON_TCL_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "radeon_context.h"
+
+extern void radeonTclPrimitive( GLcontext *ctx, GLenum prim, int hw_prim );
+extern void radeonEmitEltPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+				    GLuint flags );
+extern void radeonEmitPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+				 GLuint flags );
+
+extern void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+					      
+#define RADEON_TCL_FALLBACK_RASTER            0x1 /* rasterization */
+#define RADEON_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
+#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
+#define RADEON_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
+#define RADEON_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
+#define RADEON_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
+#define RADEON_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
+#define RADEON_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
+
+#define RADEON_MAX_TCL_VERTSIZE (15*4) 
+
+#define TCL_FALLBACK( ctx, bit, mode )	radeonTclFallback( ctx, bit, mode )
+
+
+#endif
+#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c
index 701cd6f25..c804f48e2 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c,v 1.6 2002/09/16 18:05:20 eich Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c,v 1.9 2002/12/16 16:18:59 dawes Exp $ */
 /*
  * Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
  *                      VA Linux Systems Inc., Fremont, California.
@@ -32,7 +32,7 @@
 #include "radeon_context.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
-#include "radeon_vb.h"
+#include "radeon_swtcl.h"
 #include "radeon_tex.h"
 
 #include "colormac.h"
@@ -40,11 +40,9 @@
 #include "enums.h"
 #include "image.h"
 #include "mem.h"
-#include "mmath.h"
 #include "simple_list.h"
 #include "texformat.h"
 #include "texstore.h"
-#include "texutil.h"
 
 
 /* =============================================================
@@ -65,6 +63,18 @@ static void radeonSetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap )
    case GL_CLAMP_TO_EDGE:
       t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_LAST;
       break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_BORDER;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_ATI:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_BORDER;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_ATI:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_LAST;
+      break;
    }
 
    switch ( twrap ) {
@@ -77,6 +87,18 @@ static void radeonSetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap )
    case GL_CLAMP_TO_EDGE:
       t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_LAST;
       break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_BORDER;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_T_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_ATI:
+      t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_BORDER;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_ATI:
+      t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_LAST;
+      break;
    }
 }
 
@@ -167,27 +189,19 @@ static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj )
    if (!t)
       return NULL;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
-      fprintf( stderr, "%s( %p, %p )\n",__FUNCTION__, (void*)texObj, (void*)t );
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, texObj, t );
    }
 
-   /* Initialize non-image-dependent parts of the state:
-    */
    t->tObj = texObj;
-#if 0
-   t->dirty_images = ~0;
-#endif
-   t->pp_txfilter = RADEON_BORDER_MODE_OGL;
-   t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
-		     RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
-
    make_empty_list( t );
 
+   /* Initialize non-image-dependent parts of the state:
+    */
    radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
    radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
    radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
    radeonSetTexBorderColor( t, texObj->BorderColor );
-
    return t;
 }
 
@@ -202,6 +216,7 @@ radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
    switch ( internalFormat ) {
    case 4:
    case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
       if ( format == GL_BGRA ) {
 	 if ( type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
 	    return &_mesa_texformat_argb8888;
@@ -217,6 +232,7 @@ radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
 
    case 3:
    case GL_RGB:
+   case GL_COMPRESSED_RGB:
       if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
 	 return &_mesa_texformat_rgb565;
       }
@@ -251,6 +267,7 @@ radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
    case GL_ALPHA8:
    case GL_ALPHA12:
    case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
       return &_mesa_texformat_al88;
 
    case 1:
@@ -259,6 +276,7 @@ radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
    case GL_LUMINANCE8:
    case GL_LUMINANCE12:
    case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
       return &_mesa_texformat_al88;
 
    case 2:
@@ -269,6 +287,7 @@ radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
    case GL_LUMINANCE12_ALPHA4:
    case GL_LUMINANCE12_ALPHA12:
    case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
       return &_mesa_texformat_al88;
 
    case GL_INTENSITY:
@@ -276,6 +295,7 @@ radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
    case GL_INTENSITY8:
    case GL_INTENSITY12:
    case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
       return &_mesa_texformat_i8;
 
    default:
@@ -316,13 +336,6 @@ static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level,
                           &ctx->Unpack, texObj, texImage);
 
    t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
 }
 
 
@@ -357,13 +370,6 @@ static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
 			     texImage);
 
    t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
 }
 
 
@@ -378,6 +384,8 @@ static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    radeonTexObjPtr t = (radeonTexObjPtr)texObj->DriverData;
 
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
    if ( t ) {
       radeonSwapOutTexObj( rmesa, t );
    }
@@ -396,13 +404,6 @@ static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
                           &ctx->Unpack, texObj, texImage);
 
    t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
 }
 
 
@@ -418,10 +419,11 @@ static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
 
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
    assert( t ); /* this _should_ be true */
    if ( t ) {
       radeonSwapOutTexObj( rmesa, t );
-      t->dirty_images |= (1 << level);
    }
    else {
       t = radeonAllocTexObj(texObj);
@@ -437,88 +439,12 @@ static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
 			     texImage);
 
    t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
-}
-
-#if 0
-static void radeonTexImage3D( GLcontext *ctx, GLenum target, GLint level,
-                              GLint internalFormat,
-                              GLint width, GLint height, GLint depth,
-                              GLint border,
-                              GLenum format, GLenum type, const GLvoid *pixels,
-                              const struct gl_pixelstore_attrib *packing,
-                              struct gl_texture_object *texObj,
-                              struct gl_texture_image *texImage )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   radeonTexObjPtr t = (radeonTexObjPtr)texObj->DriverData;
-
-   if ( t ) {
-      radeonSwapOutTexObj( rmesa, t );
-   }
-   else {
-      t = radeonAllocTexObj( texObj );
-      if (!t) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
-         return;
-      }
-      texObj->DriverData = t;
-   }
-
-   /* Note, this will call radeonChooseTextureFormat */
-   _mesa_store_teximage3d(ctx, target, level, internalFormat,
-                          width, height, depth, border, format, type, pixels,
-                          &ctx->Unpack, texObj, texImage);
-
-   t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
 }
 
 
-static void radeonTexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
-                                 GLint xoffset, GLint yoffset, GLint zoffset,
-                                 GLsizei width, GLsizei height, GLint depth,
-                                 GLenum format, GLenum type,
-                                 const GLvoid *pixels,
-                                 const struct gl_pixelstore_attrib *packing,
-                                 struct gl_texture_object *texObj,
-                                 struct gl_texture_image *texImage )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
-
-   assert( t ); /* this _should_ be true */
-
-   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
-                             width, height, depth, format, type, pixels,
-                             packing, texObj, texImage);
-
-   t->dirty_images |= (1 << level);
-
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
-}
-#endif
-
 
 #define SCALED_FLOAT_TO_BYTE( x, scale ) \
-		((((GLint)((256.0F / scale) * (x))) - 1) / 2)
+		(((GLuint)((255.0F / scale) * (x))) / 2)
 
 static void radeonTexEnv( GLcontext *ctx, GLenum target,
 			  GLenum pname, const GLfloat *param )
@@ -527,7 +453,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
    GLuint unit = ctx->Texture.CurrentUnit;
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
+   if ( RADEON_DEBUG & DEBUG_STATE ) {
       fprintf( stderr, "%s( %s )\n",
 	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
    }
@@ -538,11 +464,9 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
       GLuint envColor;
       UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
       envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
-      if ( rmesa->state.hw.texture[unit].pp_tfactor != envColor ) {
-	 if ( rmesa->state.texture.unit[unit].texobj ) {
-	    RADEON_STATECHANGE( rmesa, (RADEON_UPLOAD_TEX0 << unit) );
-         }
-	 rmesa->state.hw.texture[unit].pp_tfactor = envColor;
+      if ( rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] != envColor ) {
+	 RADEON_STATECHANGE( rmesa, tex[unit] );
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] = envColor;
       }
       break;
    }
@@ -560,14 +484,14 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
       if ( bias == 0 ) {
 	 b = 0;
       } else if ( bias > 0 ) {
-	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 4.0 )) << 8;
+	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 4.0 )) << RADEON_LOD_BIAS_SHIFT;
       } else {
-	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 1.0 )) << 8;
+	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 1.0 )) << RADEON_LOD_BIAS_SHIFT;
       }
-      if ( rmesa->state.hw.texture[unit].pp_txfilter != b ) {
-	 if ( rmesa->state.texture.unit[unit].texobj )
-	    RADEON_STATECHANGE( rmesa, (RADEON_UPLOAD_TEX0 << unit) );
-	 rmesa->state.hw.texture[unit].pp_txfilter = b;
+      if ( (rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] & RADEON_LOD_BIAS_MASK) != b ) {
+	 RADEON_STATECHANGE( rmesa, tex[unit] );
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] &= ~RADEON_LOD_BIAS_MASK;
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] |= (b & RADEON_LOD_BIAS_MASK);
       }
       break;
    }
@@ -584,8 +508,8 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
-       fprintf( stderr, "%s( %s )\n",__FUNCTION__,
+   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+      fprintf( stderr, "%s( %s )\n", __FUNCTION__,
 	       _mesa_lookup_enum_by_nr( pname ) );
    }
 
@@ -596,6 +520,8 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
    switch ( pname ) {
    case GL_TEXTURE_MIN_FILTER:
    case GL_TEXTURE_MAG_FILTER:
+   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+      radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
       radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
       break;
 
@@ -608,10 +534,6 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
       radeonSetTexBorderColor( t, texObj->BorderColor );
       break;
 
-   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-      radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
-      break;
-
    case GL_TEXTURE_BASE_LEVEL:
    case GL_TEXTURE_MAX_LEVEL:
    case GL_TEXTURE_MIN_LOD:
@@ -628,12 +550,9 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
       return;
    }
 
-   if ( t == rmesa->state.texture.unit[0].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-   }
-   if ( t == rmesa->state.texture.unit[1].texobj ) {
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
-   }
+   /* Mark this texobj as dirty (one bit per tex unit)
+    */
+   t->dirty_state = TEX_ALL;
 }
 
 
@@ -644,8 +563,8 @@ static void radeonBindTexture( GLcontext *ctx, GLenum target,
    radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
    GLuint unit = ctx->Texture.CurrentUnit;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
-      fprintf( stderr, "%s( %p ) unit=%d\n",__FUNCTION__, (void*)texObj, unit );
+   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+      fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, texObj, unit );
    }
 
    if ( target == GL_TEXTURE_2D || target == GL_TEXTURE_1D ) {
@@ -662,8 +581,8 @@ static void radeonDeleteTexture( GLcontext *ctx,
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
-      fprintf( stderr, "%s( %p )\n",__FUNCTION__, (void*)texObj );
+   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, texObj );
    }
 
    if ( t ) {
@@ -717,6 +636,27 @@ static void radeonInitTextureObjects( GLcontext *ctx )
    ctx->Texture.CurrentUnit = tmp;
 }
 
+/* Need:  
+ *  - Same GEN_MODE for all active bits
+ *  - Same EyePlane/ObjPlane for all active bits when using Eye/Obj
+ *  - STRQ presumably all supported (matrix means incoming R values
+ *    can end up in STQ, this has implications for vertex support,
+ *    presumably ok if maos is used, though?)
+ *  
+ * Basically impossible to do this on the fly - just collect some
+ * basic info & do the checks from ValidateState().
+ */
+static void radeonTexGen( GLcontext *ctx,
+			  GLenum coord,
+			  GLenum pname,
+			  const GLfloat *params )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+}
+
+
 void radeonInitTextureFuncs( GLcontext *ctx )
 {
    ctx->Driver.ChooseTextureFormat	= radeonChooseTextureFormat;
@@ -730,7 +670,7 @@ void radeonInitTextureFuncs( GLcontext *ctx )
    ctx->Driver.CopyTexImage2D		= _swrast_copy_teximage2d;
    ctx->Driver.CopyTexSubImage1D	= _swrast_copy_texsubimage1d;
    ctx->Driver.CopyTexSubImage2D	= _swrast_copy_texsubimage2d;
-   ctx->Driver.CopyTexSubImage3D	= _swrast_copy_texsubimage3d;
+   ctx->Driver.CopyTexSubImage3D 	= _swrast_copy_texsubimage3d;
    ctx->Driver.TestProxyTexImage	= _mesa_test_proxy_teximage;
 
    ctx->Driver.BindTexture		= radeonBindTexture;
@@ -743,6 +683,7 @@ void radeonInitTextureFuncs( GLcontext *ctx )
 
    ctx->Driver.TexEnv			= radeonTexEnv;
    ctx->Driver.TexParameter		= radeonTexParameter;
+   ctx->Driver.TexGen                   = radeonTexGen;
 
    radeonInitTextureObjects( ctx );
 }
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c
index efa1fa26a..f87853b35 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c,v 1.4 2002/09/16 18:05:20 eich Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c,v 1.7 2002/12/16 16:18:59 dawes Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -35,17 +35,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "radeon_context.h"
-#include "radeon_state.h"
-#include "radeon_ioctl.h"
-#include "radeon_vb.h"
 #include "radeon_tex.h"
 
 #include "context.h"
-#include "colormac.h"
-#include "mmath.h"
-#include "macros.h"
 #include "simple_list.h"
-#include "enums.h"
 #include "mem.h"
 
 
@@ -57,8 +50,8 @@ void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
    if ( !t )
       return;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE ) {
-      fprintf( stderr, "%s( %p, %p )\n",__FUNCTION__, (void*)t, (void*)t->tObj );
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, t, t->tObj );
    }
 
    if ( t->memBlock ) {
@@ -75,12 +68,14 @@ void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
 
       if ( t == rmesa->state.texture.unit[0].texobj ) {
          rmesa->state.texture.unit[0].texobj = NULL;
-         rmesa->state.hw.dirty &= ~RADEON_UPLOAD_TEX0;
+	 remove_from_list( &rmesa->hw.tex[0] );
+	 make_empty_list( &rmesa->hw.tex[0] );
       }
 
       if ( t == rmesa->state.texture.unit[1].texobj ) {
          rmesa->state.texture.unit[1].texobj = NULL;
-         rmesa->state.hw.dirty &= ~RADEON_UPLOAD_TEX1;
+	 remove_from_list( &rmesa->hw.tex[1] );
+	 make_empty_list( &rmesa->hw.tex[1] );
       }
    }
 
@@ -88,12 +83,13 @@ void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
    FREE( t );
 }
 
+
 /* Keep track of swapped out texture objects.
  */
 void radeonSwapOutTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
 {
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE ) {
-      fprintf( stderr, "%s( %p, %p )\n",__FUNCTION__, (void*)t, (void*)t->tObj );
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, t, t->tObj );
    }
 
    /* Bump the performace counter */
@@ -124,8 +120,7 @@ void radeonPrintLocalLRU( radeonContextPtr rmesa, int heap )
 		  t->memBlock->ofs,
 		  t->memBlock->size );
       } else {
-	 fprintf( stderr, "Texture (bound %d) at 0x%x sz 0x%x\n",
-		  t->bound,
+	 fprintf( stderr, "Texture at 0x%x sz 0x%x\n",
 		  t->memBlock->ofs,
 		  t->memBlock->size );
       }
@@ -139,7 +134,7 @@ void radeonPrintGlobalLRU( radeonContextPtr rmesa, int heap )
    radeon_tex_region_t *list = rmesa->sarea->texList[heap];
    int i, j;
 
-   fprintf( stderr, "\nGlobal LRU, heap %d list %p:\n", heap, (void*)list );
+   fprintf( stderr, "\nGlobal LRU, heap %d list %p:\n", heap, list );
 
    for ( i = 0, j = RADEON_NR_TEX_REGIONS ; i < RADEON_NR_TEX_REGIONS ; i++ ) {
       fprintf( stderr, "list[%d] age %d next %d prev %d\n",
@@ -329,9 +324,11 @@ static void radeonUploadSubImage( radeonContextPtr rmesa,
    GLuint format, pitch, offset;
    GLint imageWidth, imageHeight;
    GLint ret;
+   drmRadeonTexture tex;
+   drmRadeonTexImage tmp;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE ) {
-      fprintf( stderr, "%s( %p, %p )\n",__FUNCTION__, (void*)t, (void*)t->tObj );
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, t, t->tObj );
    }
 
    /* Ensure we have a valid texture to upload */
@@ -343,13 +340,13 @@ static void radeonUploadSubImage( radeonContextPtr rmesa,
 
    texImage = t->tObj->Image[level];
    if ( !texImage ) {
-      if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE )
-	 fprintf( stderr,  "%s: texImage %d is NULL!\n",__FUNCTION__, level );
+      if ( RADEON_DEBUG & DEBUG_TEXTURE )
+	 fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
       return;
    }
    if ( !texImage->Data ) {
-      if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE )
-	 fprintf( stderr,  "%s: image data is NULL!\n",__FUNCTION__ );
+      if ( RADEON_DEBUG & DEBUG_TEXTURE )
+	 fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
       return;
    }
 
@@ -380,7 +377,7 @@ static void radeonUploadSubImage( radeonContextPtr rmesa,
    rmesa->c_textureBytes += (dwords << 2);
 #endif
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_MSG ) {
+   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
       GLint imageX = 0;
       GLint imageY = 0;
       GLint blitX = t->image[level].x;
@@ -397,12 +394,24 @@ static void radeonUploadSubImage( radeonContextPtr rmesa,
    }
 
    t->image[level].data = texImage->Data;
-   ret = drmRadeonLoadTexture( rmesa->dri.fd, offset, pitch, format,
-			       imageWidth, imageHeight, &t->image[level] );
+
+   tex.offset = offset;
+   tex.pitch = pitch;
+   tex.format = format;
+   tex.width = imageWidth;
+   tex.height = imageHeight;
+   tex.image = &tmp;
+
+   memcpy( &tmp, &t->image[level], sizeof(drmRadeonTexImage) );
+
+   do {
+      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
+                                 &tex, sizeof(drmRadeonTexture) );
+   } while ( ret && errno == EAGAIN );
 
    if ( ret ) {
       UNLOCK_HARDWARE( rmesa );
-      fprintf( stderr, "drmRadeonTextureBlit: return = %d\n", ret );
+      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
       fprintf( stderr, "   offset=0x%08x pitch=0x%x format=%d\n",
 	       offset, pitch, format );
       fprintf( stderr, "   image width=%d height=%d\n",
@@ -412,8 +421,6 @@ static void radeonUploadSubImage( radeonContextPtr rmesa,
 	       t->image[level].data );
       exit( 1 );
    }
-
-   RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_MASKS );
 }
 
 /* Upload the texture images associated with texture `t'.  This might
@@ -425,10 +432,12 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t )
    const int numLevels = t->lastLevel - t->firstLevel + 1;
    int i;
    int heap;
+   radeonTexObjPtr t0 = rmesa->state.texture.unit[0].texobj;
+   radeonTexObjPtr t1 = rmesa->state.texture.unit[1].texobj;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_TEXTURE ) {
-       fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n",__FUNCTION__,
-	       (void*)rmesa->glCtx, (void*)t->tObj, t->totalSize,
+   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+      fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
+	       rmesa->glCtx, t->tObj, t->totalSize,
 	       t->firstLevel, t->lastLevel );
    }
 
@@ -465,7 +474,8 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t )
 
       /* Kick out textures until the requested texture fits */
       while ( !t->memBlock ) {
-	 if ( rmesa->texture.objects[heap].prev->bound ) {
+	 if ( rmesa->texture.objects[heap].prev == t0 ||
+	      rmesa->texture.objects[heap].prev == t1 ) {
 	    fprintf( stderr,
 		     "radeonUploadTexImages: ran into bound texture\n" );
 	    UNLOCK_HARDWARE( rmesa );
@@ -512,11 +522,9 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t )
       }
 #endif
 
-      if ( t == rmesa->state.texture.unit[0].texobj )
-	 RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX0 );
-
-      if ( t == rmesa->state.texture.unit[1].texobj )
-	 RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_TEX1 );
+      /* Mark this texobj as dirty on all units:
+       */
+      t->dirty_state = TEX_ALL;
    }
 
    /* Let the world know we've used this memory recently */
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c
index f66f589cd..943074514 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c
@@ -1,4 +1,4 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c,v 1.3 2002/09/16 18:05:21 eich Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c,v 1.6 2002/12/16 16:18:59 dawes Exp $ */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -37,16 +37,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_context.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
-#include "radeon_vb.h"
+#include "radeon_swtcl.h"
 #include "radeon_tex.h"
+#include "radeon_tcl.h"
 
-#include "colormac.h"
 #include "context.h"
 #include "enums.h"
-#include "macros.h"
 #include "mem.h"
-#include "mmath.h"
-#include "simple_list.h"
 #include "texformat.h"
 
 
@@ -228,6 +225,8 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
    t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
 		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
 
+   t->dirty_state = TEX_ALL;
+
    radeonUploadTexImages( rmesa, t );
 }
 
@@ -723,7 +722,7 @@ do {									\
  * Texture unit state management
  */
 
-static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
+static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
@@ -733,9 +732,9 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
    GLuint color_arg[3], alpha_arg[3];
    GLuint i, numColorArgs = 0, numAlphaArgs = 0;
 
-   if ( RADEON_DEBUG & DEBUG_VERBOSE_API ) {
-       fprintf( stderr, "%s( %p, %d ) format=%s\n",__FUNCTION__,
-	       (void*)ctx, unit, _mesa_lookup_enum_by_nr( format ) );
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %d ) format=%s\n", __FUNCTION__,
+	       ctx, unit, _mesa_lookup_enum_by_nr( format ) );
    }
 
    /* Set the texture environment state.  Isn't this nice and clean?
@@ -764,7 +763,7 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
          break;
       case GL_COLOR_INDEX:
       default:
-	 return;
+	 return GL_FALSE;
       }
       break;
 
@@ -787,7 +786,7 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 break;
       case GL_COLOR_INDEX:
       default:
-	 return;
+	 return GL_FALSE;
       }
       break;
 
@@ -807,7 +806,7 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 break;
       case GL_COLOR_INDEX:
       default:
-	 return;
+	 return GL_FALSE;
       }
       break;
 
@@ -830,7 +829,7 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 break;
       case GL_COLOR_INDEX:
       default:
-	 return;
+	 return GL_FALSE;
       }
       break;
 
@@ -853,7 +852,7 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 break;
       case GL_COLOR_INDEX:
       default:
-	 return;
+	 return GL_FALSE;
       }
       break;
 
@@ -872,16 +871,19 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 break;
       case GL_MODULATE:
       case GL_ADD:
-      case GL_ADD_SIGNED_EXT:
+      case GL_ADD_SIGNED:
+      case GL_SUBTRACT:
+      case GL_DOT3_RGB:
+      case GL_DOT3_RGBA:
       case GL_DOT3_RGB_EXT:
       case GL_DOT3_RGBA_EXT:
 	 numColorArgs = 2;
 	 break;
-      case GL_INTERPOLATE_EXT:
+      case GL_INTERPOLATE:
 	 numColorArgs = 3;
 	 break;
       default:
-	 return;
+	 return GL_FALSE;
       }
 
       switch ( texUnit->CombineModeA ) {
@@ -890,14 +892,15 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 break;
       case GL_MODULATE:
       case GL_ADD:
-      case GL_ADD_SIGNED_EXT:
+      case GL_ADD_SIGNED:
+      case GL_SUBTRACT:
 	 numAlphaArgs = 2;
 	 break;
-      case GL_INTERPOLATE_EXT:
+      case GL_INTERPOLATE:
 	 numAlphaArgs = 3;
 	 break;
       default:
-	 return;
+	 return GL_FALSE;
       }
 
       /* Step 1:
@@ -911,17 +914,17 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 case GL_TEXTURE:
 	    color_arg[i] = radeon_texture_color[op][unit];
 	    break;
-	 case GL_CONSTANT_EXT:
+	 case GL_CONSTANT:
 	    color_arg[i] = radeon_tfactor_color[op];
 	    break;
-	 case GL_PRIMARY_COLOR_EXT:
+	 case GL_PRIMARY_COLOR:
 	    color_arg[i] = radeon_primary_color[op];
 	    break;
-	 case GL_PREVIOUS_EXT:
+	 case GL_PREVIOUS:
 	    color_arg[i] = radeon_previous_color[op];
 	    break;
 	 default:
-	    return;
+	    return GL_FALSE;
 	 }
       }
 
@@ -933,17 +936,17 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 case GL_TEXTURE:
 	    alpha_arg[i] = radeon_texture_alpha[op][unit];
 	    break;
-	 case GL_CONSTANT_EXT:
+	 case GL_CONSTANT:
 	    alpha_arg[i] = radeon_tfactor_alpha[op];
 	    break;
-	 case GL_PRIMARY_COLOR_EXT:
+	 case GL_PRIMARY_COLOR:
 	    alpha_arg[i] = radeon_primary_alpha[op];
 	    break;
-	 case GL_PREVIOUS_EXT:
+	 case GL_PREVIOUS:
 	    alpha_arg[i] = radeon_previous_alpha[op];
 	    break;
 	 default:
-	    return;
+	    return GL_FALSE;
 	 }
       }
 
@@ -973,7 +976,7 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 RADEON_COLOR_ARG( 0, A );
 	 RADEON_COLOR_ARG( 1, C );
 	 break;
-      case GL_ADD_SIGNED_EXT:
+      case GL_ADD_SIGNED:
 	 color_combine = (RADEON_COLOR_ARG_B_ZERO |
 			  RADEON_COMP_ARG_B |
 			  RADEON_BLEND_CTL_ADDSIGNED |
@@ -981,13 +984,31 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 RADEON_COLOR_ARG( 0, A );
 	 RADEON_COLOR_ARG( 1, C );
 	 break;
-      case GL_INTERPOLATE_EXT:
+      case GL_SUBTRACT:
+	 color_combine = (RADEON_COLOR_ARG_B_ZERO |
+			  RADEON_COMP_ARG_B |
+			  RADEON_BLEND_CTL_SUBTRACT |
+			  RADEON_CLAMP_TX);
+	 RADEON_COLOR_ARG( 0, A );
+	 RADEON_COLOR_ARG( 1, C );
+	 break;
+      case GL_INTERPOLATE:
 	 color_combine = (RADEON_BLEND_CTL_BLEND |
 			  RADEON_CLAMP_TX);
 	 RADEON_COLOR_ARG( 0, B );
 	 RADEON_COLOR_ARG( 1, A );
 	 RADEON_COLOR_ARG( 2, C );
 	 break;
+
+      case GL_DOT3_RGB:
+      case GL_DOT3_RGBA:
+	 if ( texUnit->CombineScaleShiftRGB 
+	      != (RADEON_SCALE_1X >> RADEON_SCALE_SHIFT) )
+	 {
+	     return GL_FALSE;
+	 }
+	 /* FALLTHROUGH */
+
       case GL_DOT3_RGB_EXT:
       case GL_DOT3_RGBA_EXT:
 	 color_combine = (RADEON_COLOR_ARG_C_ZERO |
@@ -997,7 +1018,7 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 RADEON_COLOR_ARG( 1, B );
 	 break;
       default:
-	 return;
+	 return GL_FALSE;
       }
 
       switch ( texUnit->CombineModeA ) {
@@ -1023,7 +1044,7 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 RADEON_ALPHA_ARG( 0, A );
 	 RADEON_ALPHA_ARG( 1, C );
 	 break;
-      case GL_ADD_SIGNED_EXT:
+      case GL_ADD_SIGNED:
 	 alpha_combine = (RADEON_ALPHA_ARG_B_ZERO |
 			  RADEON_COMP_ARG_B |
 			  RADEON_BLEND_CTL_ADDSIGNED |
@@ -1031,7 +1052,15 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 RADEON_ALPHA_ARG( 0, A );
 	 RADEON_ALPHA_ARG( 1, C );
 	 break;
-      case GL_INTERPOLATE_EXT:
+      case GL_SUBTRACT:
+	 alpha_combine = (RADEON_COLOR_ARG_B_ZERO |
+			  RADEON_COMP_ARG_B |
+			  RADEON_BLEND_CTL_SUBTRACT |
+			  RADEON_CLAMP_TX);
+	 RADEON_ALPHA_ARG( 0, A );
+	 RADEON_ALPHA_ARG( 1, C );
+	 break;
+      case GL_INTERPOLATE:
 	 alpha_combine = (RADEON_BLEND_CTL_BLEND |
 			  RADEON_CLAMP_TX);
 	 RADEON_ALPHA_ARG( 0, B );
@@ -1039,23 +1068,26 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 	 RADEON_ALPHA_ARG( 2, C );
 	 break;
       default:
-	 return;
+	 return GL_FALSE;
       }
 
-      if ( texUnit->CombineModeRGB == GL_DOT3_RGB_EXT ) {
+      if ( (texUnit->CombineModeRGB == GL_DOT3_RGB_EXT)
+	   || (texUnit->CombineModeRGB == GL_DOT3_RGB_ARB) ) {
 	 alpha_combine |= RADEON_DOT_ALPHA_DONT_REPLICATE;
       }
 
       /* Step 3:
-       * Apply the scale factor.  The EXT extension has a somewhat
-       * unnecessary restriction that the scale must be 4x.  The ARB
-       * extension will likely drop this and we can just apply the
-       * scale factors regardless.
+       * Apply the scale factor.  The EXT version of the DOT3 extension does
+       * not support the scale factor, but the ARB version (and the version in
+       * OpenGL 1.3) does.  The catch is that the Radeon only supports a 1X
+       * multiplier in hardware w/the ARB version.
        */
       if ( texUnit->CombineModeRGB != GL_DOT3_RGB_EXT &&
-	   texUnit->CombineModeRGB != GL_DOT3_RGBA_EXT ) {
-	 color_combine |= (texUnit->CombineScaleShiftRGB << 21);
-	 alpha_combine |= (texUnit->CombineScaleShiftA << 21);
+	   texUnit->CombineModeRGB != GL_DOT3_RGBA_EXT &&
+	   texUnit->CombineModeRGB != GL_DOT3_RGB &&
+	   texUnit->CombineModeRGB != GL_DOT3_RGBA ) {
+	 color_combine |= (texUnit->CombineScaleShiftRGB << RADEON_SCALE_SHIFT);
+	 alpha_combine |= (texUnit->CombineScaleShiftA << RADEON_SCALE_SHIFT);
       }
       else
       {
@@ -1068,57 +1100,227 @@ static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
       break;
 
    default:
-      return;
+      return GL_FALSE;
+   }
+
+   if ( rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] != color_combine ||
+	rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] != alpha_combine ) {
+      RADEON_STATECHANGE( rmesa, tex[unit] );
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] = color_combine;
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] = alpha_combine;
+   }
+    
+   return GL_TRUE;
+}
+
+#define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK |	\
+			      RADEON_MIN_FILTER_MASK | 		\
+			      RADEON_MAG_FILTER_MASK |		\
+			      RADEON_MAX_ANISO_MASK |		\
+			      RADEON_CLAMP_S_MASK | 		\
+			      RADEON_CLAMP_T_MASK)
+
+#define TEXOBJ_TXFORMAT_MASK (RADEON_TXFORMAT_WIDTH_MASK |	\
+			      RADEON_TXFORMAT_HEIGHT_MASK |	\
+			      RADEON_TXFORMAT_FORMAT_MASK |	\
+			      RADEON_TXFORMAT_ALPHA_IN_MAP)
+
+
+static void import_tex_obj_state( radeonContextPtr rmesa,
+				  int unit,
+				  radeonTexObjPtr texobj )
+{
+   GLuint *cmd = RADEON_DB_STATE( tex[unit] );
+
+   cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
+   cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+   texobj->dirty_state &= ~(1<<unit);
+
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
+}
+
+
+
+
+static void set_texgen_matrix( radeonContextPtr rmesa, 
+			       GLuint unit,
+			       GLfloat *s_plane,
+			       GLfloat *t_plane )
+{
+   static const GLfloat scale_identity[4] = { 1,1,1,1 };
+
+   if (!TEST_EQ_4V( s_plane, scale_identity) ||
+      !(TEST_EQ_4V( t_plane, scale_identity))) {
+      rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE<<unit;
+      rmesa->TexGenMatrix[unit].m[0]  = s_plane[0];
+      rmesa->TexGenMatrix[unit].m[4]  = s_plane[1];
+      rmesa->TexGenMatrix[unit].m[8]  = s_plane[2];
+      rmesa->TexGenMatrix[unit].m[12] = s_plane[3];
+
+      rmesa->TexGenMatrix[unit].m[1]  = t_plane[0];
+      rmesa->TexGenMatrix[unit].m[5]  = t_plane[1];
+      rmesa->TexGenMatrix[unit].m[9]  = t_plane[2];
+      rmesa->TexGenMatrix[unit].m[13] = t_plane[3];
+      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+}
+
+/* Ignoring the Q texcoord for now.
+ *
+ * Returns GL_FALSE if fallback required.  
+ */
+static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
+{  
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+   GLuint tmp = rmesa->TexGenEnabled;
+
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+   rmesa->TexGenNeedNormals[unit] = 0;
+
+   if (0)
+   fprintf(stderr, "%s unit %d cleared texgenEnabled %x\n", __FUNCTION__,
+	   unit, rmesa->TexGenEnabled);
+
+   if ((texUnit->TexGenEnabled & (S_BIT|T_BIT)) == 0) {
+      /* Disabled, no fallback:
+       */
+      rmesa->TexGenEnabled |= 
+	 (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+      return GL_TRUE;
+   }
+   else if (texUnit->TexGenEnabled & Q_BIT) {
+      /* Very easy to do this, in fact would remove a fallback case
+       * elsewhere, but I haven't done it yet...  Fallback: 
+       */
+      fprintf(stderr, "fallback Q_BIT\n");
+      return GL_FALSE;
+   }
+   else if ((texUnit->TexGenEnabled & (S_BIT|T_BIT)) != (S_BIT|T_BIT) ||
+	    texUnit->GenModeS != texUnit->GenModeT) {
+      /* Mixed modes, fallback:
+       */
+/*        fprintf(stderr, "fallback mixed texgen\n"); */
+      return GL_FALSE;
+   }
+   else
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
+
+   switch (texUnit->GenModeS) {
+   case GL_OBJECT_LINEAR:
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift;
+      set_texgen_matrix( rmesa, unit, 
+			 texUnit->ObjectPlaneS,
+			 texUnit->ObjectPlaneT);
+      break;
+
+   case GL_EYE_LINEAR:
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift;
+      set_texgen_matrix( rmesa, unit, 
+			 texUnit->EyePlaneS,
+			 texUnit->EyePlaneT);
+      break;
+
+   case GL_REFLECTION_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_REFLECT<<inputshift;
+      break;
+
+   case GL_NORMAL_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_NORMAL<<inputshift;
+      break;
+
+   case GL_SPHERE_MAP:
+   default:
+      /* Unsupported mode, fallback:
+       */
+      /*  fprintf(stderr, "fallback unsupported texgen\n"); */
+      return GL_FALSE;
    }
 
-   if ( rmesa->state.hw.texture[unit].pp_txcblend != color_combine ||
-	rmesa->state.hw.texture[unit].pp_txablend != alpha_combine ) {
-      RADEON_STATECHANGE( rmesa, (RADEON_UPLOAD_TEX0 << unit) );
-      rmesa->state.hw.texture[unit].pp_txcblend = color_combine;
-      rmesa->state.hw.texture[unit].pp_txablend = alpha_combine;
+   if (tmp != rmesa->TexGenEnabled) {
+      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
    }
+
+/*     fprintf(stderr, "%s unit %d texgenEnabled %x\n", __FUNCTION__, */
+/*  	   unit, rmesa->TexGenEnabled); */
+   return GL_TRUE;
 }
 
-static void radeonUpdateTextureUnit( GLcontext *ctx, int unit )
+
+
+
+static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
    if ( texUnit->_ReallyEnabled & (TEXTURE0_1D|TEXTURE0_2D) ) {
       struct gl_texture_object *tObj = texUnit->_Current;
       radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
-      GLuint flag = RADEON_UPLOAD_TEX0 << unit;
       GLenum format;
 
       /* Fallback if there's a texture border */
-      if ( tObj->Image[tObj->BaseLevel]->Border > 0 ) {
-         FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE );
-         return;
-      }
+      if ( tObj->Image[tObj->BaseLevel]->Border > 0 )
+         return GL_FALSE;
 
       /* Upload teximages (not pipelined)
        */
       if ( t->dirty_images ) {
 	 RADEON_FIREVERTICES( rmesa );
 	 radeonSetTexImages( rmesa, tObj );
-	 if ( !t->memBlock ) {
-	    FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE );
-	    return;
-	 }
+	 /* Fallback if we can't upload:
+	  */
+	 if ( !t->memBlock ) 
+	    return GL_FALSE;
       }
 
       /* Update state if this is a different texture object to last
        * time.
        */
       if ( rmesa->state.texture.unit[unit].texobj != t ) {
-	 if ( rmesa->state.texture.unit[unit].texobj == NULL ) {
-	    RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-	    rmesa->state.hw.context.pp_cntl |= (RADEON_TEX_0_ENABLE |
-					     RADEON_TEX_BLEND_0_ENABLE)<<unit;
-	 }
-	 RADEON_STATECHANGE( rmesa, flag );
 	 rmesa->state.texture.unit[unit].texobj = t;
-	 radeonUpdateTexLRU( rmesa, t ); /* done too often */
+	 t->dirty_state |= 1<<unit;
+	 radeonUpdateTexLRU( rmesa, t ); /* XXX: should be locked! */
+      }
+
+
+      /* Newly enabled?
+       */
+      if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) {
+	 RADEON_STATECHANGE( rmesa, ctx );
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
+	    (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+
+	 RADEON_STATECHANGE( rmesa, tcl );
+
+	 if (unit == 0) 
+	    rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_ST0;
+	 else 
+	    rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_ST1;
+
+	 rmesa->recheck_texgen[unit] = GL_TRUE;
+      }
+
+      if (t->dirty_state & (1<<unit)) {
+	 import_tex_obj_state( rmesa, unit, t );
+      }
+      
+      if (rmesa->recheck_texgen[unit]) {
+	 GLboolean fallback = !radeon_validate_texgen( ctx, unit );
+	 TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+	 rmesa->recheck_texgen[unit] = 0;
+	 rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
       }
 
       format = tObj->Image[tObj->BaseLevel]->Format;
@@ -1126,27 +1328,76 @@ static void radeonUpdateTextureUnit( GLcontext *ctx, int unit )
 	   rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
 	 rmesa->state.texture.unit[unit].format = format;
 	 rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
-	 radeonUpdateTextureEnv( ctx, unit );
+	 if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
+	    return GL_FALSE;
+	 }
       }
    }
    else if ( texUnit->_ReallyEnabled ) {
-      FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE );
-      return;
+      /* 3d textures, etc:
+       */
+      return GL_FALSE;
    }
-   else {
+   else if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) {
       /* Texture unit disabled */
       rmesa->state.texture.unit[unit].texobj = 0;
-      rmesa->state.hw.dirty &= ~(RADEON_UPLOAD_TEX0 << unit);
-      RADEON_STATECHANGE( rmesa, RADEON_UPLOAD_CONTEXT );
-      rmesa->state.hw.context.pp_cntl &= ~((RADEON_TEX_0_ENABLE |
-					    RADEON_TEX_BLEND_0_ENABLE) << unit);
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= 
+	 ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit);
+
+      RADEON_STATECHANGE( rmesa, tcl );
+      switch (unit) {
+      case 0:
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_TCL_VTX_ST0 |
+						   RADEON_TCL_VTX_Q0);
+	    break;
+      case 1:
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_TCL_VTX_ST1 |
+						   RADEON_TCL_VTX_Q1);
+	 break;
+      default:
+	 break;
+      }
+
+
+      if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
+	 TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+	 rmesa->recheck_texgen[unit] = GL_TRUE;
+      }
+
+
+
+      {
+	 GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+	 GLuint tmp = rmesa->TexGenEnabled;
+
+	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+	 rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+	 rmesa->TexGenNeedNormals[unit] = 0;
+	 rmesa->TexGenEnabled |= 
+	    (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+
+	 if (tmp != rmesa->TexGenEnabled) {
+	    rmesa->recheck_texgen[unit] = GL_TRUE;
+	    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+	 }
+      }
    }
+
+   return GL_TRUE;
 }
 
 void radeonUpdateTextureState( GLcontext *ctx )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, GL_FALSE );
-   radeonUpdateTextureUnit( ctx, 0 );
-   radeonUpdateTextureUnit( ctx, 1 );
+   GLboolean ok;
+
+   ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
+	 radeonUpdateTextureUnit( ctx, 1 ));
+
+   FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
+
+   if (rmesa->TclFallback)
+      radeonChooseVertexState( ctx );
 }
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c
index 6b06a33c2..3da333549 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c
@@ -1,607 +1,979 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c,v 1.2 2002/09/10 00:39:39 dawes Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c,v 1.5 2002/12/16 16:18:59 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
 /*
- * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
  * Authors:
- *    Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
  */
-
-
 #include "glheader.h"
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#include "radeon_vtxfmt.h"
+
 #include "api_noop.h"
-#include "colormac.h"
+#include "api_arrayelt.h"
 #include "context.h"
-#include "light.h"
-#include "macros.h"
 #include "mem.h"
 #include "mmath.h"
 #include "mtypes.h"
-#include "simple_list.h"
+#include "enums.h"
+#include "glapi.h"
+#include "colormac.h"
+#include "light.h"
+#include "state.h"
 #include "vtxfmt.h"
 
-#include "math/m_xform.h"
 #include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_array_api.h"
 
-#include "radeon_context.h"
-#include "radeon_ioctl.h"
-#include "radeon_vb.h"
-#include "radeon_vtxfmt.h"
-
+struct radeon_vb vb;
 
-#define VERTEX				radeonVertex
-#define TNL_VERTEX			radeonTnlVertex
+static void radeonFlushVertices( GLcontext *, GLuint );
 
+static void count_func( const char *name,  struct dynfn *l )
+{
+   int i = 0;
+   struct dynfn *f;
+   foreach (f, l) i++;
+   if (i) fprintf(stderr, "%s: %d\n", name, i );
+}
 
-#define LINTERP( T, A, B )		((A) + (T) * ((B) - (A)))
-
-#define INTERP_RGBA( t, out, a, b )					\
-do {									\
-   GLint i;								\
-   for ( i = 0 ; i < 4 ; i++ ) {					\
-      GLfloat fa = UBYTE_TO_FLOAT( a[i] );				\
-      GLfloat fb = UBYTE_TO_FLOAT( b[i] );				\
-      GLfloat fo = LINTERP( t, fa, fb );				\
-      UNCLAMPED_FLOAT_TO_UBYTE( out[i], fo );				\
-   }									\
-} while (0)
-
+static void count_funcs( radeonContextPtr rmesa )
+{
+   count_func( "Vertex2f", &rmesa->vb.dfn_cache.Vertex2f );
+   count_func( "Vertex2fv", &rmesa->vb.dfn_cache.Vertex2fv );
+   count_func( "Vertex3f", &rmesa->vb.dfn_cache.Vertex3f );
+   count_func( "Vertex3fv", &rmesa->vb.dfn_cache.Vertex3fv );
+   count_func( "Color4ub", &rmesa->vb.dfn_cache.Color4ub );
+   count_func( "Color4ubv", &rmesa->vb.dfn_cache.Color4ubv );
+   count_func( "Color3ub", &rmesa->vb.dfn_cache.Color3ub );
+   count_func( "Color3ubv", &rmesa->vb.dfn_cache.Color3ubv );
+   count_func( "Color4f", &rmesa->vb.dfn_cache.Color4f );
+   count_func( "Color4fv", &rmesa->vb.dfn_cache.Color4fv );
+   count_func( "Color3f", &rmesa->vb.dfn_cache.Color3f );
+   count_func( "Color3fv", &rmesa->vb.dfn_cache.Color3fv );
+   count_func( "SecondaryColor3f", &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   count_func( "SecondaryColor3fv", &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   count_func( "SecondaryColor3ub", &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   count_func( "SecondaryColor3ubv", &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   count_func( "Normal3f", &rmesa->vb.dfn_cache.Normal3f );
+   count_func( "Normal3fv", &rmesa->vb.dfn_cache.Normal3fv );
+   count_func( "TexCoord2f", &rmesa->vb.dfn_cache.TexCoord2f );
+   count_func( "TexCoord2fv", &rmesa->vb.dfn_cache.TexCoord2fv );
+   count_func( "TexCoord1f", &rmesa->vb.dfn_cache.TexCoord1f );
+   count_func( "TexCoord1fv", &rmesa->vb.dfn_cache.TexCoord1fv );
+   count_func( "MultiTexCoord2fARB", &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   count_func( "MultiTexCoord2fvARB", &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   count_func( "MultiTexCoord1fARB", &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   count_func( "MultiTexCoord1fvARB", &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
+}
 
 
+void radeon_copy_to_current( GLcontext *ctx ) 
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-/* ================================================================
- * Color functions:  Always update ctx->Current.*
- */
+   assert(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT);
+   assert(vb.context == ctx);
 
-/* ================================================================
- * Material functions:
- */
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_N0) {
+      ctx->Current.Normal[0] = vb.normalptr[0];
+      ctx->Current.Normal[1] = vb.normalptr[1];
+      ctx->Current.Normal[2] = vb.normalptr[2];
+   }
 
-static __inline void radeon_recalc_base_color( GLcontext *ctx )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   struct gl_light *light;
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_PKCOLOR) {
+      ctx->Current.Color[0] = UBYTE_TO_FLOAT( vb.colorptr->red );
+      ctx->Current.Color[1] = UBYTE_TO_FLOAT( vb.colorptr->green );
+      ctx->Current.Color[2] = UBYTE_TO_FLOAT( vb.colorptr->blue );
+      ctx->Current.Color[3] = UBYTE_TO_FLOAT( vb.colorptr->alpha );
+   } 
+   
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_FPCOLOR) {
+      ctx->Current.Color[0] = vb.floatcolorptr[0];
+      ctx->Current.Color[1] = vb.floatcolorptr[1];
+      ctx->Current.Color[2] = vb.floatcolorptr[2];
+   }
 
-   COPY_3V( rmesa->state.light.base_color, ctx->Light._BaseColor[0] );
-   foreach ( light, &ctx->Light.EnabledList ) {
-      ACC_3V( rmesa->state.light.base_color, light->_MatAmbient[0] );
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_FPALPHA)
+      ctx->Current.Color[3] = vb.floatcolorptr[3];
+      
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_PKSPEC) {
+      ctx->Current.SecondaryColor[0] = UBYTE_TO_FLOAT( vb.specptr->red );
+      ctx->Current.SecondaryColor[1] = UBYTE_TO_FLOAT( vb.specptr->green );
+      ctx->Current.SecondaryColor[2] = UBYTE_TO_FLOAT( vb.specptr->blue );
+   } 
+
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_ST0) {
+      ctx->Current.Texcoord[0][0] = vb.texcoordptr[0][0];
+      ctx->Current.Texcoord[0][1] = vb.texcoordptr[0][1];
+      ctx->Current.Texcoord[0][2] = 0.0F;
+      ctx->Current.Texcoord[0][3] = 1.0F;
    }
-    
-   UNCLAMPED_FLOAT_TO_UBYTE( rmesa->state.light.base_alpha, 
-			     ctx->Light.Material[0].Diffuse[3] );
-}
 
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_ST1) {
+      ctx->Current.Texcoord[1][0] = vb.texcoordptr[1][0];
+      ctx->Current.Texcoord[1][1] = vb.texcoordptr[1][1];
+      ctx->Current.Texcoord[1][2] = 0.0F;
+      ctx->Current.Texcoord[1][3] = 1.0F;
+   }
 
-/* ================================================================
- * Normal functions:
- */
+   ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
+}
 
-struct radeon_norm_tab {
-   void (*normal3f_multi)( GLfloat x, GLfloat y, GLfloat z );
-   void (*normal3fv_multi)( const GLfloat *v );
-   void (*normal3f_single)( GLfloat x, GLfloat y, GLfloat z );
-   void (*normal3fv_single)( const GLfloat *v );
+static GLboolean discreet_gl_prim[GL_POLYGON+1] = {
+   1,				/* 0 points */
+   1,				/* 1 lines */
+   0,				/* 2 line_strip */
+   0,				/* 3 line_loop */
+   1,				/* 4 tris */
+   0,				/* 5 tri_fan */
+   0,				/* 6 tri_strip */
+   1,				/* 7 quads */
+   0,				/* 8 quadstrip */
+   0,				/* 9 poly */
 };
 
-static struct radeon_norm_tab norm_tab[0x4];
-
+static void flush_prims( radeonContextPtr rmesa )
+{
+   int i,j;
+   struct radeon_dma_region tmp = rmesa->dma.current;
+   
+   tmp.buf->refcount++;
+   tmp.aos_size = vb.vertex_size;
+   tmp.aos_stride = vb.vertex_size;
+   tmp.aos_start = GET_START(&tmp);
+
+   rmesa->dma.current.ptr = rmesa->dma.current.start += 
+      (vb.initial_counter - vb.counter) * vb.vertex_size * 4; 
+
+   rmesa->tcl.vertex_format = rmesa->vb.vertex_format;
+   rmesa->tcl.aos_components[0] = &tmp;
+   rmesa->tcl.nr_aos_components = 1;
+   rmesa->dma.flush = 0;
+
+   /* Optimize the primitive list:
+    */
+   if (rmesa->vb.nrprims > 1) {
+      for (j = 0, i = 1 ; i < rmesa->vb.nrprims; i++) {
+	 int pj = rmesa->vb.primlist[j].prim & 0xf;
+	 int pi = rmesa->vb.primlist[i].prim & 0xf;
+      
+	 if (pj == pi && discreet_gl_prim[pj] &&
+	     rmesa->vb.primlist[i].start == rmesa->vb.primlist[j].end) {
+	    rmesa->vb.primlist[j].end = rmesa->vb.primlist[i].end;
+	 }
+	 else {
+	    j++;
+	    if (j != i) rmesa->vb.primlist[j] = rmesa->vb.primlist[i];
+	 }
+      }
+      rmesa->vb.nrprims = j+1;
+   }
 
-#define HAVE_HW_LIGHTING 0
+   for (i = 0 ; i < rmesa->vb.nrprims; i++) {
+      if (RADEON_DEBUG & DEBUG_PRIMS)
+	 fprintf(stderr, "vtxfmt prim %d: %s %d..%d\n", i,
+		 _mesa_lookup_enum_by_nr( rmesa->vb.primlist[i].prim & 
+					  PRIM_MODE_MASK ),
+		 rmesa->vb.primlist[i].start,
+		 rmesa->vb.primlist[i].end);
+
+      radeonEmitPrimitive( vb.context,
+			   rmesa->vb.primlist[i].start,
+			   rmesa->vb.primlist[i].end,
+			   rmesa->vb.primlist[i].prim );
+   }
 
-#define GET_CURRENT_VERTEX						\
-   GET_CURRENT_CONTEXT(ctx);						\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
-   radeonTnlVertexPtr v = rmesa->imm.v0
+   rmesa->vb.nrprims = 0;
+   radeonReleaseDmaRegion( rmesa, &tmp, __FUNCTION__ );
+}
 
-#define CURRENT_NORMAL			rmesa->state.current.normal
-#define BASE_COLOR			rmesa->state.light.base_color
-#define BASE_ALPHA			rmesa->state.light.base_alpha
 
-#define VERT_COLOR( COMP )		v->color[COMP]
+static void start_prim( radeonContextPtr rmesa, GLuint mode )
+{
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, vb.initial_counter - vb.counter);
 
+   rmesa->vb.primlist[rmesa->vb.nrprims].start = vb.initial_counter - vb.counter;
+   rmesa->vb.primlist[rmesa->vb.nrprims].prim = mode;
+}
 
-#define IND (0)
-#define TAG(x) radeon_##x
-#define PRESERVE_NORMAL_DEFS
-#include "tnl_dd/t_dd_imm_napi.h"
+static void note_last_prim( radeonContextPtr rmesa, GLuint flags )
+{
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, vb.initial_counter - vb.counter);
 
-#define IND (NORM_RESCALE)
-#define TAG(x) radeon_##x##_rescale
-#define PRESERVE_NORMAL_DEFS
-#include "tnl_dd/t_dd_imm_napi.h"
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      rmesa->vb.primlist[rmesa->vb.nrprims].prim |= flags;
+      rmesa->vb.primlist[rmesa->vb.nrprims].end = vb.initial_counter - vb.counter;
 
-#define IND (NORM_NORMALIZE)
-#define TAG(x) radeon_##x##_normalize
-#include "tnl_dd/t_dd_imm_napi.h"
+      if (++(rmesa->vb.nrprims) == RADEON_MAX_PRIMS)
+	 flush_prims( rmesa );
+   }
+}
 
 
-static void radeon_init_norm_funcs( void )
+static void copy_vertex( radeonContextPtr rmesa, GLuint n, GLfloat *dst )
 {
-   radeon_init_norm();
-   radeon_init_norm_rescale();
-   radeon_init_norm_normalize();
-}
+   GLuint i;
+   GLfloat *src = (GLfloat *)(rmesa->dma.current.address + 
+			      rmesa->dma.current.ptr + 
+			      (rmesa->vb.primlist[rmesa->vb.nrprims].start + n) * 
+			      vb.vertex_size * 4);
 
-static void radeon_choose_Normal3f( GLfloat x, GLfloat y, GLfloat z )
-{
-   GET_CURRENT_CONTEXT(ctx);
-   GLuint index;
+   if (RADEON_DEBUG & DEBUG_VFMT) 
+      fprintf(stderr, "copy_vertex %d\n", rmesa->vb.primlist[rmesa->vb.nrprims].start + n);
 
-   if ( ctx->Light.Enabled ) {
-      if ( ctx->Transform.Normalize ) {
-	 index = NORM_NORMALIZE;
-      }
-      else if ( !ctx->Transform.RescaleNormals &&
-		ctx->_ModelViewInvScale != 1.0 ) {
-	 index = NORM_RESCALE;
-      }
-      else {
-	 index = 0;
-      }
+   for (i = 0 ; i < vb.vertex_size; i++) {
+      dst[i] = src[i];
+   }
+}
 
-      if ( ctx->Light.EnabledList.next == ctx->Light.EnabledList.prev ) {
-	 ctx->Exec->Normal3f  = norm_tab[index].normal3f_single;
+/* NOTE: This actually reads the copied vertices back from uncached
+ * memory.  Could also use the counter/notify mechanism to populate
+ * tmp on the fly as vertices are generated.  
+ */
+static GLuint copy_dma_verts( radeonContextPtr rmesa, GLfloat (*tmp)[15] )
+{
+   GLuint ovf, i;
+   GLuint nr = (vb.initial_counter - vb.counter) - rmesa->vb.primlist[rmesa->vb.nrprims].start;
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d verts\n", __FUNCTION__, nr);
+
+   switch( rmesa->vb.prim[0] )
+   {
+   case GL_POINTS:
+      return 0;
+   case GL_LINES:
+      ovf = nr&1;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_TRIANGLES:
+      ovf = nr%3;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_QUADS:
+      ovf = nr&3;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_LINE_STRIP:
+      if (nr == 0) 
+	 return 0;
+      copy_vertex( rmesa, nr-1, tmp[0] );
+      return 1;
+   case GL_LINE_LOOP:
+   case GL_TRIANGLE_FAN:
+   case GL_POLYGON:
+      if (nr == 0) 
+	 return 0;
+      else if (nr == 1) {
+	 copy_vertex( rmesa, 0, tmp[0] );
+	 return 1;
       } else {
-	 ctx->Exec->Normal3f  = norm_tab[index].normal3f_multi;
+	 copy_vertex( rmesa, 0, tmp[0] );
+	 copy_vertex( rmesa, nr-1, tmp[1] );
+	 return 2;
       }
-   } else {
-      ctx->Exec->Normal3f  = _mesa_noop_Normal3f;
+   case GL_TRIANGLE_STRIP:
+      ovf = MIN2( nr-1, 2 );
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_QUAD_STRIP:
+      ovf = MIN2( nr-1, 2 );
+      if (nr > 2) ovf += nr&1;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   default:
+      assert(0);
+      return 0;
    }
-
-   glNormal3f( x, y, z );
 }
 
-static void radeon_choose_Normal3fv( const GLfloat *v )
+static void VFMT_FALLBACK_OUTSIDE_BEGIN_END( const char *caller )
 {
-   GET_CURRENT_CONTEXT(ctx);
-   GLuint index;
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   if ( ctx->Light.Enabled ) {
-      if ( ctx->Transform.Normalize ) {
-	 index = NORM_NORMALIZE;
-      }
-      else if ( !ctx->Transform.RescaleNormals &&
-		ctx->_ModelViewInvScale != 1.0 ) {
-	 index = NORM_RESCALE;
-      }
-      else {
-	 index = 0;
-      }
+   if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
 
-      if ( ctx->Light.EnabledList.next == ctx->Light.EnabledList.prev ) {
-	 ctx->Exec->Normal3fv = norm_tab[index].normal3fv_single;
-      } else {
-	 ctx->Exec->Normal3fv = norm_tab[index].normal3fv_multi;
-      }
-   } else {
-      ctx->Exec->Normal3fv = _mesa_noop_Normal3fv;
-   }
+   if (ctx->Driver.NeedFlush) 
+      radeonFlushVertices( ctx, ctx->Driver.NeedFlush );
+
+   if (ctx->NewState)
+      _mesa_update_state( ctx ); /* clear state so fell_back sticks */
+
+   _tnl_wakeup_exec( ctx );
 
-   glNormal3fv( v );
+   assert( rmesa->dma.flush == 0 );
+   rmesa->vb.fell_back = GL_TRUE;
+   rmesa->vb.installed = GL_FALSE;
+   vb.context = 0;
 }
 
 
+static void VFMT_FALLBACK( const char *caller )
+{
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat tmp[3][15];
+   GLuint i, prim;
+   GLuint ind = rmesa->vb.vertex_format;
+   GLuint nrverts;
+   GLfloat alpha = 1.0;
 
+   if (RADEON_DEBUG & (DEBUG_FALLBACKS|DEBUG_VFMT))
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
 
-/* ================================================================
- * Texture functions:
- */
+   if (rmesa->vb.prim[0] == GL_POLYGON+1) {
+      VFMT_FALLBACK_OUTSIDE_BEGIN_END( __FUNCTION__ );
+      return;
+   }
 
-#define GET_CURRENT							\
-   GET_CURRENT_CONTEXT(ctx);						\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+   /* Copy vertices out of dma:
+    */
+   nrverts = copy_dma_verts( rmesa, tmp );
 
-#define NUM_TEXTURE_UNITS		RADEON_MAX_TEXTURE_UNITS
-#define DO_PROJ_TEX
+   /* Finish the prim at this point:
+    */
+   note_last_prim( rmesa, 0 );
+   flush_prims( rmesa );
 
-#define CURRENT_TEXTURE( unit )		rmesa->state.current.texture[unit]
+   /* Update ctx->Driver.CurrentExecPrimitive and swap in swtnl. 
+    */
+   prim = rmesa->vb.prim[0];
+   ctx->Driver.CurrentExecPrimitive = GL_POLYGON+1;
+   _tnl_wakeup_exec( ctx );
 
-#define TAG(x) radeon_##x
-#include "tnl_dd/t_dd_imm_tapi.h"
+   assert(rmesa->dma.flush == 0);
+   rmesa->vb.fell_back = GL_TRUE;
+   rmesa->vb.installed = GL_FALSE;
+   vb.context = 0;
+   glBegin( prim );
+   
+   if (rmesa->vb.installed_color_3f_sz == 4)
+      alpha = ctx->Current.Color[3];
 
+   /* Replay saved vertices
+    */
+   for (i = 0 ; i < nrverts; i++) {
+      GLuint offset = 3;
+      if (ind & RADEON_CP_VC_FRMT_N0) {
+	 glNormal3fv( &tmp[i][offset] ); 
+	 offset += 3;
+      }
 
+      if (ind & RADEON_CP_VC_FRMT_PKCOLOR) {
+	 radeon_color_t *col = (radeon_color_t *)&tmp[i][offset];
+	 glColor4ub( col->red, col->green, col->blue, col->alpha );
+	 offset++;
+      }
+      else if (ind & RADEON_CP_VC_FRMT_FPALPHA) {
+	 glColor4fv( &tmp[i][offset] ); 
+	 offset+=4;
+      } 
+      else if (ind & RADEON_CP_VC_FRMT_FPCOLOR) {
+	 glColor3fv( &tmp[i][offset] ); 
+	 offset+=3;
+      }
 
-/* ================================================================
- * Vertex functions:
- */
+      if (ind & RADEON_CP_VC_FRMT_PKSPEC) {
+	 radeon_color_t *spec = (radeon_color_t *)&tmp[i][offset];
+	 _glapi_Dispatch->SecondaryColor3ubEXT( spec->red, spec->green, spec->blue );
+	 offset++;
+      }
 
-#define GET_CURRENT_VERTEX						\
-   GET_CURRENT_CONTEXT(ctx);						\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
-   radeonTnlVertexPtr v = rmesa->imm.v0
+      if (ind & RADEON_CP_VC_FRMT_ST0) {
+	 glTexCoord2fv( &tmp[i][offset] ); 
+	 offset += 2;
+      }
 
-#define CURRENT_VERTEX			v->obj
-#define SAVE_VERTEX			rmesa->imm.save_vertex( ctx, v )
+      if (ind & RADEON_CP_VC_FRMT_ST1) {
+	 glMultiTexCoord2fvARB( GL_TEXTURE1_ARB, &tmp[i][offset] );
+	 offset += 2;
+      }
+      glVertex3fv( &tmp[i][0] );
+   }
 
-#define TAG(x) radeon_##x
-#include "tnl_dd/t_dd_imm_vapi.h"
+   /* Replay current vertex
+    */
+   if (ind & RADEON_CP_VC_FRMT_N0) 
+      glNormal3fv( vb.normalptr );
+
+   if (ind & RADEON_CP_VC_FRMT_PKCOLOR)
+      glColor4ub( vb.colorptr->red, vb.colorptr->green, vb.colorptr->blue, vb.colorptr->alpha );
+   else if (ind & RADEON_CP_VC_FRMT_FPALPHA)
+      glColor4fv( vb.floatcolorptr );
+   else if (ind & RADEON_CP_VC_FRMT_FPCOLOR) {
+      if (rmesa->vb.installed_color_3f_sz == 4 && alpha != 1.0)
+	 glColor4f( vb.floatcolorptr[0],
+		    vb.floatcolorptr[1],
+		    vb.floatcolorptr[2],
+		    alpha );
+      else
+	 glColor3fv( vb.floatcolorptr );
+   }
 
+   if (ind & RADEON_CP_VC_FRMT_PKSPEC) 
+      _glapi_Dispatch->SecondaryColor3ubEXT( vb.specptr->red, vb.specptr->green, vb.specptr->blue ); 
 
+   if (ind & RADEON_CP_VC_FRMT_ST0) 
+      glTexCoord2fv( vb.texcoordptr[0] );
 
+   if (ind & RADEON_CP_VC_FRMT_ST1) 
+      glMultiTexCoord2fvARB( GL_TEXTURE1_ARB, vb.texcoordptr[1] );
+}
 
-struct radeon_vert_tab {
-   void (*save_vertex)( GLcontext *ctx, radeonTnlVertexPtr v );
-   void (*interpolate_vertex)( GLfloat t,
-			       radeonTnlVertex *O,
-			       const radeonTnlVertex *I,
-			       const radeonTnlVertex *J );
-};
 
-static struct radeon_vert_tab vert_tab[0xf];
 
-#define VTX_NORMAL	0x0
-#define VTX_RGBA	0x1
-#define VTX_SPEC	0x2
-#define VTX_TEX0	0x4
-#define VTX_TEX1	0x8
+static void wrap_buffer( void )
+{
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat tmp[3][15];
+   GLuint i, nrverts;
 
-#define LOCAL_VARS							\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+   if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_PRIMS))
+      fprintf(stderr, "%s %d\n", __FUNCTION__, vb.initial_counter - vb.counter);
 
-#define CURRENT_COLOR			rmesa->state.current.color
-#define CURRENT_SPECULAR		rmesa->state.current.specular
+   /* Don't deal with parity.
+    */
+   if ((((vb.initial_counter - vb.counter) -  
+	 rmesa->vb.primlist[rmesa->vb.nrprims].start) & 1)) {
+      vb.counter++;
+      vb.initial_counter++;
+      return;
+   }
 
-#define CURRENT_NORMAL( COMP )		rmesa->state.current.normal[COMP]
-#define CURRENT_TEXTURE( U, COMP )	rmesa->state.current.texture[U][COMP]
+   /* Copy vertices out of dma:
+    */
+   if (rmesa->vb.prim[0] == GL_POLYGON+1) 
+      nrverts = 0;
+   else {
+      nrverts = copy_dma_verts( rmesa, tmp );
+
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%d vertices to copy\n", nrverts);
+   
+      /* Finish the prim at this point:
+       */
+      note_last_prim( rmesa, 0 );
+   }
 
-#define FLUSH_VERTEX			rmesa->imm.flush_vertex( ctx, v );
+   /* Fire any buffered primitives
+    */
+   flush_prims( rmesa );
 
+   /* Get new buffer
+    */
+   radeonRefillCurrentDmaRegion( rmesa );
 
-#define IND (VTX_NORMAL)
-#define TAG(x) radeon_##x##_NORMAL
-#define PRESERVE_VERTEX_DEFS
-#include "tnl_dd/t_dd_imm_vertex.h"
+   /* Reset counter, dmaptr
+    */
+   vb.dmaptr = (int *)(rmesa->dma.current.ptr + rmesa->dma.current.address);
+   vb.counter = (rmesa->dma.current.end - rmesa->dma.current.ptr) / 
+      (vb.vertex_size * 4);
+   vb.counter--;
+   vb.initial_counter = vb.counter;
+   vb.notify = wrap_buffer;
 
-#define IND (VTX_NORMAL|VTX_TEX0)
-#define TAG(x) radeon_##x##_NORMAL_TEX0
-#define PRESERVE_VERTEX_DEFS
-#include "tnl_dd/t_dd_imm_vertex.h"
+   rmesa->dma.flush = flush_prims;
 
-#define IND (VTX_NORMAL|VTX_TEX0|VTX_TEX1)
-#define TAG(x) radeon_##x##_NORMAL_TEX0_TEX1
-#define PRESERVE_VERTEX_DEFS
-#include "tnl_dd/t_dd_imm_vertex.h"
+   /* Restart wrapped primitive:
+    */
+   if (rmesa->vb.prim[0] != GL_POLYGON+1)
+      start_prim( rmesa, rmesa->vb.prim[0] );
 
-#define IND (VTX_RGBA)
-#define TAG(x) radeon_##x##_RGBA
-#define PRESERVE_VERTEX_DEFS
-#include "tnl_dd/t_dd_imm_vertex.h"
+   /* Reemit saved vertices
+    */
+   for (i = 0 ; i < nrverts; i++) {
+      if (RADEON_DEBUG & DEBUG_VERTS) {
+	 int j;
+	 fprintf(stderr, "re-emit vertex %d to %p\n", i, vb.dmaptr);
+	 if (RADEON_DEBUG & DEBUG_VERBOSE)
+	    for (j = 0 ; j < vb.vertex_size; j++) 
+	       fprintf(stderr, "\t%08x/%f\n", *(int*)&tmp[i][j], tmp[i][j]);
+      }
 
-#define IND (VTX_RGBA|VTX_TEX0)
-#define TAG(x) radeon_##x##_RGBA_TEX0
-#define PRESERVE_VERTEX_DEFS
-#include "tnl_dd/t_dd_imm_vertex.h"
+      memcpy( vb.dmaptr, tmp[i], vb.vertex_size * 4 );
+      vb.dmaptr += vb.vertex_size;
+      vb.counter--;
+   }
+}
 
-#define IND (VTX_RGBA|VTX_TEX1)
-#define TAG(x) radeon_##x##_RGBA_TEX0_TEX1
-#include "tnl_dd/t_dd_imm_vertex.h"
 
 
-static void radeon_init_vert_funcs( void )
+static GLboolean check_vtx_fmt( GLcontext *ctx )
 {
-   radeon_init_vert_NORMAL();
-   radeon_init_vert_NORMAL_TEX0();
-   radeon_init_vert_NORMAL_TEX0_TEX1();
-   radeon_init_vert_RGBA();
-   radeon_init_vert_RGBA_TEX0();
-   radeon_init_vert_RGBA_TEX0_TEX1();
-}
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint ind = RADEON_CP_VC_FRMT_Z;
 
+   if (rmesa->TclFallback || rmesa->vb.fell_back || ctx->CompileFlag)
+      return GL_FALSE;
 
+   if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) 
+      ctx->Driver.FlushVertices( ctx, FLUSH_UPDATE_CURRENT );
+   
+   /* Make all this event-driven:
+    */
+   if (ctx->Light.Enabled) {
+      ind |= RADEON_CP_VC_FRMT_N0;
+
+      /* TODO: make this data driven: If we receive only ubytes, send
+       * color as ubytes.  Also check if converting (with free
+       * checking for overflow) is cheaper than sending floats
+       * directly.
+       */
+      if (ctx->Light.ColorMaterialEnabled) {
+	 ind |= RADEON_CP_VC_FRMT_FPCOLOR;
+         if (ctx->Color.AlphaEnabled) {
+	    ind |= RADEON_CP_VC_FRMT_FPALPHA;
+         }
+      }
+   }
+   else {
+      /* TODO: make this data driven?
+       */
+      ind |= RADEON_CP_VC_FRMT_PKCOLOR;
+	 
+      if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+	 ind |= RADEON_CP_VC_FRMT_PKSPEC;
+      }
+   }
 
+   if (ctx->Texture.Unit[0]._ReallyEnabled) {
+      if (ctx->Texture.Unit[0].TexGenEnabled) {
+	 if (rmesa->TexGenNeedNormals[0]) {
+	    ind |= RADEON_CP_VC_FRMT_N0;
+	 }
+      } else {
+	 if (ctx->Current.Texcoord[0][2] != 0.0F ||
+	     ctx->Current.Texcoord[0][3] != 1.0) {
+	    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+	       fprintf(stderr, "%s: rq0\n", __FUNCTION__);
+	    return GL_FALSE;
+	 }
+	 ind |= RADEON_CP_VC_FRMT_ST0;
+      }
+   }
 
+   if (ctx->Texture.Unit[1]._ReallyEnabled) {
+      if (ctx->Texture.Unit[1].TexGenEnabled) {
+	 if (rmesa->TexGenNeedNormals[1]) {
+	    ind |= RADEON_CP_VC_FRMT_N0;
+	 }
+      } else {
+	 if (ctx->Current.Texcoord[1][2] != 0.0F ||
+	     ctx->Current.Texcoord[1][3] != 1.0) {
+	    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+	       fprintf(stderr, "%s: rq1\n", __FUNCTION__);
+	    return GL_FALSE;
+	 }
+	 ind |= RADEON_CP_VC_FRMT_ST1;
+      }
+   }
 
+   if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_STATE))
+      fprintf(stderr, "%s: format: 0x%x\n", __FUNCTION__, ind );
 
+   RADEON_NEWPRIM(rmesa);
+   rmesa->vb.vertex_format = ind;
+   vb.vertex_size = 3;
+   rmesa->vb.prim = &ctx->Driver.CurrentExecPrimitive;
 
-#define LOCAL_VARS							\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+   vb.normalptr = ctx->Current.Normal;
+   vb.colorptr = NULL;
+   vb.floatcolorptr = ctx->Current.Color;
+   vb.specptr = NULL;
+   vb.floatspecptr = ctx->Current.SecondaryColor;
+   vb.texcoordptr[0] = ctx->Current.Texcoord[0];
+   vb.texcoordptr[1] = ctx->Current.Texcoord[1];
 
-#define FLUSH_VERTEX			rmesa->imm.flush_vertex
+   /* Run through and initialize the vertex components in the order
+    * the hardware understands:
+    */
+   if (ind & RADEON_CP_VC_FRMT_N0) {
+      vb.normalptr = &vb.vertex[vb.vertex_size].f;
+      vb.vertex_size += 3;
+      vb.normalptr[0] = ctx->Current.Normal[0];
+      vb.normalptr[1] = ctx->Current.Normal[1];
+      vb.normalptr[2] = ctx->Current.Normal[2];
+   }
 
-#define IMM_VERTEX( V )			rmesa->imm.V
-#define IMM_VERTICES( n )		rmesa->imm.vertices[n]
+   if (ind & RADEON_CP_VC_FRMT_PKCOLOR) {
+      vb.colorptr = &vb.vertex[vb.vertex_size].color;
+      vb.vertex_size += 1;
+      UNCLAMPED_FLOAT_TO_CHAN( vb.colorptr->red,   ctx->Current.Color[0] );
+      UNCLAMPED_FLOAT_TO_CHAN( vb.colorptr->green, ctx->Current.Color[1] );
+      UNCLAMPED_FLOAT_TO_CHAN( vb.colorptr->blue,  ctx->Current.Color[2] );
+      UNCLAMPED_FLOAT_TO_CHAN( vb.colorptr->alpha, ctx->Current.Color[3] );
+   }
 
+   if (ind & RADEON_CP_VC_FRMT_FPCOLOR) {
+      assert(!(ind & RADEON_CP_VC_FRMT_PKCOLOR));
+      vb.floatcolorptr = &vb.vertex[vb.vertex_size].f;
+      vb.vertex_size += 3;
+      vb.floatcolorptr[0] = ctx->Current.Color[0];
+      vb.floatcolorptr[1] = ctx->Current.Color[1];
+      vb.floatcolorptr[2] = ctx->Current.Color[2];
+
+      if (ind & RADEON_CP_VC_FRMT_FPALPHA) {
+	 vb.vertex_size += 1;
+	 vb.floatcolorptr[3] = ctx->Current.Color[3];
+      }
+   }
+   
+   if (ind & RADEON_CP_VC_FRMT_PKSPEC) {
+      vb.specptr = &vb.vertex[vb.vertex_size].color;
+      vb.vertex_size += 1;
+      UNCLAMPED_FLOAT_TO_CHAN( vb.specptr->red,   ctx->Current.SecondaryColor[0] );
+      UNCLAMPED_FLOAT_TO_CHAN( vb.specptr->green, ctx->Current.SecondaryColor[1] );
+      UNCLAMPED_FLOAT_TO_CHAN( vb.specptr->blue,  ctx->Current.SecondaryColor[2] );
+   }
 
-/* TINY_VERTEX_FORMAT:
- */
-#define GET_VERTEX_SPACE( n ) radeonAllocDmaLow( rmesa, n * 16, __FUNCTION__ )
-
-#define EMIT_VERTEX( vb, v )						\
-do {									\
-   vb[0] = *(GLuint *)&(v->clip[0]);					\
-   vb[1] = *(GLuint *)&(v->clip[1]);					\
-   vb[2] = *(GLuint *)&(v->clip[2]);					\
-   vb[3] = *(GLuint *)&(v->color);					\
-   vb += 4;								\
-} while (0)
 
-#define TAG(x) radeon_##x##_tiny
-#define PRESERVE_PRIM_DEFS
-#include "tnl_dd/t_dd_imm_primtmp.h"
+   if (ind & RADEON_CP_VC_FRMT_ST0) {
+      vb.texcoordptr[0] = &vb.vertex[vb.vertex_size].f;
+      vb.vertex_size += 2;
+      vb.texcoordptr[0][0] = ctx->Current.Texcoord[0][0];
+      vb.texcoordptr[0][1] = ctx->Current.Texcoord[0][1];   
+   } 
+
+   if (ind & RADEON_CP_VC_FRMT_ST1) {
+      vb.texcoordptr[1] = &vb.vertex[vb.vertex_size].f;
+      vb.vertex_size += 2;
+      vb.texcoordptr[1][0] = ctx->Current.Texcoord[1][0];
+      vb.texcoordptr[1][1] = ctx->Current.Texcoord[1][1];
+   } 
+
+   if (rmesa->vb.installed_vertex_format != rmesa->vb.vertex_format) {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "reinstall on vertex_format change\n");
+      _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+      rmesa->vb.installed_vertex_format = rmesa->vb.vertex_format;
+   }
 
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s -- success\n", __FUNCTION__);
+   
+   return GL_TRUE;
+}
 
-/* NOTEX_VERTEX_FORMAT:
- */
-#define GET_VERTEX_SPACE( n ) radeonAllocDmaLow( rmesa, n * 24, __FUNCTION__ )
-
-#define EMIT_VERTEX( vb, v )						\
-do {									\
-   vb[0] = *(GLuint *)&(v->clip[0]);					\
-   vb[1] = *(GLuint *)&(v->clip[1]);					\
-   vb[2] = *(GLuint *)&(v->clip[2]);					\
-   vb[3] = *(GLuint *)&(v->clip[3]);					\
-   vb[4] = *(GLuint *)&(v->color);					\
-   vb[5] = *(GLuint *)&(v->specular);					\
-   vb += 6;								\
-} while (0)
 
-#define TAG(x) radeon_##x##_notex
-#define PRESERVE_PRIM_DEFS
-#include "tnl_dd/t_dd_imm_primtmp.h"
+void radeonVtxfmtInvalidate( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 
+   rmesa->vb.recheck = GL_TRUE;
+   rmesa->vb.fell_back = GL_FALSE;
+}
 
-/* TEX0_VERTEX_FORMAT:
- */
-#define GET_VERTEX_SPACE( n ) radeonAllocDmaLow( rmesa, n * 32, __FUNCTION__ )
-
-#define EMIT_VERTEX( vb, v )						\
-do {									\
-   vb[0] = *(GLuint *)&(v->clip[0]);					\
-   vb[1] = *(GLuint *)&(v->clip[1]);					\
-   vb[2] = *(GLuint *)&(v->clip[2]);					\
-   vb[3] = *(GLuint *)&(v->clip[3]);					\
-   vb[4] = *(GLuint *)&(v->color);					\
-   vb[5] = *(GLuint *)&(v->specular);					\
-   vb[6] = *(GLuint *)&(v->texture[0][0]);				\
-   vb[7] = *(GLuint *)&(v->texture[0][1]);				\
-   vb += 8;								\
-} while (0)
 
-#define TAG(x) radeon_##x##_tex0
-#define PRESERVE_PRIM_DEFS
-#include "tnl_dd/t_dd_imm_primtmp.h"
+static void radeonNewList( GLcontext *ctx, GLuint list, GLenum mode )
+{
+   VFMT_FALLBACK_OUTSIDE_BEGIN_END( __FUNCTION__ );
+}
 
 
-/* TEX1_VERTEX_FORMAT:
- */
-#define GET_VERTEX_SPACE( n ) radeonAllocDmaLow( rmesa, n * 40, __FUNCTION__ )
-
-#define EMIT_VERTEX( vb, v )						\
-do {									\
-   vb[0] = *(GLuint *)&(v->clip[0]);					\
-   vb[1] = *(GLuint *)&(v->clip[1]);					\
-   vb[2] = *(GLuint *)&(v->clip[2]);					\
-   vb[3] = *(GLuint *)&(v->clip[3]);					\
-   vb[4] = *(GLuint *)&(v->color);					\
-   vb[5] = *(GLuint *)&(v->specular);					\
-   vb[6] = *(GLuint *)&(v->texture[0][0]);				\
-   vb[7] = *(GLuint *)&(v->texture[0][1]);				\
-   vb[8] = *(GLuint *)&(v->texture[1][0]);				\
-   vb[9] = *(GLuint *)&(v->texture[1][1]);				\
-   vb += 10;								\
-} while (0)
+static void radeonVtxfmtValidate( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 
-#define TAG(x) radeon_##x##_tex1
-#define PRESERVE_PRIM_DEFS
-#include "tnl_dd/t_dd_imm_primtmp.h"
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
+   if (ctx->Driver.NeedFlush)
+      ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush );
 
+   rmesa->vb.recheck = GL_FALSE;
 
+   if (check_vtx_fmt( ctx )) {
+      if (!rmesa->vb.installed) {
+	 if (RADEON_DEBUG & DEBUG_VFMT)
+	    fprintf(stderr, "reinstall (new install)\n");
 
+	 _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+	 ctx->Driver.FlushVertices = radeonFlushVertices;
+	 ctx->Driver.NewList = radeonNewList;
+	 rmesa->vb.installed = GL_TRUE;
+	 vb.context = ctx;
+      }
+      else if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s: already installed", __FUNCTION__);
+   } 
+   else {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s: failed\n", __FUNCTION__);
+
+      if (rmesa->vb.installed) {
+	 if (rmesa->dma.flush)
+	    rmesa->dma.flush( rmesa );
+	 _tnl_wakeup_exec( ctx );
+	 rmesa->vb.installed = GL_FALSE;
+	 vb.context = 0;
+      }
+   }      
+}
 
 
 
-/* Bzzt: Material changes are lost on fallback.
+/* Materials:
  */
-static void radeon_Materialfv( GLenum face, GLenum pname,
+static void radeon_Materialfv( GLenum face, GLenum pname, 
 			       const GLfloat *params )
 {
-   GET_CURRENT_CONTEXT(ctx);
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      VFMT_FALLBACK( __FUNCTION__ );
+      glMaterialfv( face, pname, params );
+      return;
+   }
    _mesa_noop_Materialfv( face, pname, params );
-   radeon_recalc_base_color( ctx );
+   radeonUpdateMaterial( vb.context );
 }
 
 
-
-
-
-/* ================================================================
- * Fallback functions:
+/* Begin/End
  */
-
-static void radeon_do_fallback( GLcontext *ctx )
+static void radeon_Begin( GLenum mode )
 {
+   GLcontext *ctx = vb.context;
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   struct radeon_current_state *current = &rmesa->state.current;
+   
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
-   /* Tell tnl to restore its exec vtxfmt, rehook its driver callbacks
-    * and revive internal state that depended on those callbacks:
-    */
-   _tnl_wakeup_exec( ctx );
+   if (mode > GL_POLYGON) {
+      _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" );
+      return;
+   }
 
-   /* Replay enough vertices that the current primitive is continued
-    * correctly:
-    */
-   if ( rmesa->imm.prim != PRIM_OUTSIDE_BEGIN_END ) {
-      glBegin( rmesa->imm.prim );
-      /*rmesa->fire_on_fallback( ctx );*/
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
+      return;
    }
+   
+   if (ctx->NewState) 
+      _mesa_update_state( ctx );
 
-   /* Replay the current, partially complete vertex:
-    */
-   if ( current->texture[0][3] == 1.0 ) {
-      glMultiTexCoord3fvARB( GL_TEXTURE0_ARB, current->texture[0] );
-   } else {
-      glMultiTexCoord4fvARB( GL_TEXTURE0_ARB, current->texture[0] );
+   if (rmesa->NewGLState)
+      radeonValidateState( ctx );
+
+   if (rmesa->vb.recheck) 
+      radeonVtxfmtValidate( ctx );
+
+   if (!rmesa->vb.installed) {
+      glBegin( mode );
+      return;
    }
 
-   if ( current->texture[1][3] == 1.0 ) {
-      glMultiTexCoord3fvARB( GL_TEXTURE1_ARB, current->texture[1] );
-   } else {
-      glMultiTexCoord4fvARB( GL_TEXTURE1_ARB, current->texture[1] );
+
+   if (rmesa->dma.flush && vb.counter < 12) {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s: flush almost-empty buffers\n", __FUNCTION__);
+      flush_prims( rmesa );
    }
 
-   /* FIXME: Secondary color, fog coord...
+   /* Need to arrange to save vertices here?  Or always copy from dma (yuk)?
     */
+   if (!rmesa->dma.flush) {
+      if (rmesa->dma.current.ptr + 12*vb.vertex_size*4 > 
+	  rmesa->dma.current.end) {
+	 RADEON_NEWPRIM( rmesa );
+	 radeonRefillCurrentDmaRegion( rmesa );
+      }
 
-   if ( ctx->Light.Enabled ) {
-      glColor4fv( ctx->Current.Color );	/* Catch ColorMaterial */
-      glNormal3fv( current->normal );
-   } else {
-      glColor4ubv( current->color );
+      vb.dmaptr = (int *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
+      vb.counter = (rmesa->dma.current.end - rmesa->dma.current.ptr) / 
+	 (vb.vertex_size * 4);
+      vb.counter--;
+      vb.initial_counter = vb.counter;
+      vb.notify = wrap_buffer;
+      rmesa->dma.flush = flush_prims;
+      vb.context->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
    }
+   
+   
+   rmesa->vb.prim[0] = mode;
+   start_prim( rmesa, mode | PRIM_BEGIN );
 }
 
-#define PRE_LOOPBACK( FUNC ) do {					\
-   GET_CURRENT_CONTEXT(ctx);						\
-   radeon_do_fallback( ctx );						\
-} while (0)
 
-#define TAG(x) radeon_fallback_##x
-#include "vtxfmt_tmp.h"
 
+static void radeon_End( void )
+{
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->vb.prim[0] == GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" );
+      return;
+   }
+	  
+   note_last_prim( rmesa, PRIM_END );
+   rmesa->vb.prim[0] = GL_POLYGON+1;
+}
 
 
+/* Fallback on difficult entrypoints:
+ */
+#define PRE_LOOPBACK( FUNC )			\
+do {						\
+   if (RADEON_DEBUG & DEBUG_VFMT) 		\
+      fprintf(stderr, "%s\n", __FUNCTION__);	\
+   VFMT_FALLBACK( __FUNCTION__ );		\
+} while (0)
+#define TAG(x) radeon_fallback_##x
+#include "vtxfmt_tmp.h"
 
 
 
-static void radeon_Begin( GLenum prim )
+static GLboolean radeonNotifyBegin( GLcontext *ctx, GLenum p )
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
-   if ( prim > GL_POLYGON ) {
-      _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" );
-      return;
-   }
-   if ( rmesa->imm.prim != PRIM_OUTSIDE_BEGIN_END ) {
-      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
-      return;
-   }
+   assert(!rmesa->vb.installed);
 
-   ctx->Driver.NeedFlush |= (FLUSH_STORED_VERTICES |
-			     FLUSH_UPDATE_CURRENT);
+   if (ctx->NewState) 
+      _mesa_update_state( ctx );
 
+   if (rmesa->NewGLState)
+      radeonValidateState( ctx );
 
-   radeonChooseVertexState( ctx );
+   if (ctx->Driver.NeedFlush)
+      ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush );
 
+   if (rmesa->vb.recheck) 
+      radeonVtxfmtValidate( ctx );
 
-   rmesa->imm.prim = prim;
-   rmesa->imm.v0 = &rmesa->imm.vertices[0];
+   if (!rmesa->vb.installed) {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s -- failed\n", __FUNCTION__);
+      return GL_FALSE;
+   }
 
-   rmesa->imm.save_vertex = radeon_save_vertex_RGBA;
-   rmesa->imm.flush_vertex = rmesa->imm.flush_tab[prim];
+   radeon_Begin( p );
+   return GL_TRUE;
 }
 
-static void radeon_End( void )
+static void radeonFlushVertices( GLcontext *ctx, GLuint flags )
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 
-   if ( rmesa->imm.prim == PRIM_OUTSIDE_BEGIN_END ) {
-      _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" );
-      return;
-   }
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
 
-   rmesa->imm.prim = PRIM_OUTSIDE_BEGIN_END;
+   assert(rmesa->vb.installed);
+   assert(vb.context == ctx);
 
-   ctx->Driver.NeedFlush &= ~(FLUSH_STORED_VERTICES |
-			      FLUSH_UPDATE_CURRENT);
-}
+   if (flags & FLUSH_UPDATE_CURRENT) {
+      radeon_copy_to_current( ctx );
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "reinstall on update_current\n");
+      _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+      ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
+   }
 
+   if (flags & FLUSH_STORED_VERTICES) {
+      radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+      assert (rmesa->dma.flush == 0 ||
+	      rmesa->dma.flush == flush_prims);
+      if (rmesa->dma.flush == flush_prims)
+	 flush_prims( RADEON_CONTEXT( ctx ) );
+      ctx->Driver.NeedFlush &= ~FLUSH_STORED_VERTICES;
+   }
+}
 
 
 
+/* At this point, don't expect very many versions of each function to
+ * be generated, so not concerned about freeing them?
+ */
 
 
-void radeonInitTnlModule( GLcontext *ctx )
+void radeonVtxfmtInit( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLvertexformat *vfmt = &(rmesa->imm.vtxfmt);
-
-   return;
-
-   radeon_init_norm_funcs();
-   radeon_init_vert_funcs();
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   GLvertexformat *vfmt = &(rmesa->vb.vtxfmt);
 
    MEMSET( vfmt, 0, sizeof(GLvertexformat) );
 
-   /* Handled fully in supported states:
+   /* Hook in chooser functions for codegen, etc:
     */
-   vfmt->ArrayElement = NULL;				/* FIXME: ... */
-   vfmt->Color3f = radeon_choose_Color3f;
-   vfmt->Color3fv = radeon_choose_Color3fv;
-   vfmt->Color3ub = radeon_choose_Color3ub;
-   vfmt->Color3ubv = radeon_choose_Color3ubv;
-   vfmt->Color4f = radeon_choose_Color4f;
-   vfmt->Color4fv = radeon_choose_Color4fv;
-   vfmt->Color4ub = radeon_choose_Color4ub;
-   vfmt->Color4ubv = radeon_choose_Color4ubv;
-   vfmt->FogCoordfvEXT = radeon_FogCoordfvEXT;
-   vfmt->FogCoordfEXT = radeon_FogCoordfEXT;
-   vfmt->Materialfv = radeon_Materialfv;
-   vfmt->MultiTexCoord1fARB = radeon_MultiTexCoord1fARB;
-   vfmt->MultiTexCoord1fvARB = radeon_MultiTexCoord1fvARB;
-   vfmt->MultiTexCoord2fARB = radeon_MultiTexCoord2fARB;
-   vfmt->MultiTexCoord2fvARB = radeon_MultiTexCoord2fvARB;
-   vfmt->MultiTexCoord3fARB = radeon_MultiTexCoord3fARB;
-   vfmt->MultiTexCoord3fvARB = radeon_MultiTexCoord3fvARB;
-   vfmt->MultiTexCoord4fARB = radeon_MultiTexCoord4fARB;
-   vfmt->MultiTexCoord4fvARB = radeon_MultiTexCoord4fvARB;
-   vfmt->Normal3f = radeon_choose_Normal3f;
-   vfmt->Normal3fv = radeon_choose_Normal3fv;
-   vfmt->SecondaryColor3ubEXT = radeon_SecondaryColor3ubEXT;
-   vfmt->SecondaryColor3ubvEXT = radeon_SecondaryColor3ubvEXT;
-   vfmt->SecondaryColor3fEXT = radeon_SecondaryColor3fEXT;
-   vfmt->SecondaryColor3fvEXT = radeon_SecondaryColor3fvEXT;
-   vfmt->TexCoord1f = radeon_TexCoord1f;
-   vfmt->TexCoord1fv = radeon_TexCoord1fv;
-   vfmt->TexCoord2f = radeon_TexCoord2f;
-   vfmt->TexCoord2fv = radeon_TexCoord2fv;
-   vfmt->TexCoord3f = radeon_TexCoord3f;
-   vfmt->TexCoord3fv = radeon_TexCoord3fv;
-   vfmt->TexCoord4f = radeon_TexCoord4f;
-   vfmt->TexCoord4fv = radeon_TexCoord4fv;
-   vfmt->Vertex2f = radeon_Vertex2f;
-   vfmt->Vertex2fv = radeon_Vertex2fv;
-   vfmt->Vertex3f = radeon_Vertex3f;
-   vfmt->Vertex3fv = radeon_Vertex3fv;
-   vfmt->Vertex4f = radeon_Vertex4f;
-   vfmt->Vertex4fv = radeon_Vertex4fv;
+   radeonVtxfmtInitChoosers( vfmt );
 
+   /* Handled fully in supported states, but no codegen:
+    */
+   vfmt->Materialfv = radeon_Materialfv;
+   vfmt->ArrayElement = _ae_loopback_array_elt;	        /* generic helper */
+   vfmt->Rectf = _mesa_noop_Rectf;			/* generic helper */
    vfmt->Begin = radeon_Begin;
    vfmt->End = radeon_End;
 
-   vfmt->Rectf = _mesa_noop_Rectf;			/* generic helper */
-
-   vfmt->DrawArrays = NULL;
-   vfmt->DrawElements = NULL;
-   vfmt->DrawRangeElements = _mesa_noop_DrawRangeElements; /* discard range */
+   /* Fallback for performance reasons:  (Fix with cva/elt path here and
+    * dmatmp2.h style primitive-merging)
+    *
+    * These should call NotifyBegin(), as should _tnl_EvalMesh, to allow
+    * a driver-hook.
+    */
+   vfmt->DrawArrays = radeon_fallback_DrawArrays;
+   vfmt->DrawElements = radeon_fallback_DrawElements;
+   vfmt->DrawRangeElements = radeon_fallback_DrawRangeElements; 
 
 
    /* Not active in supported states; just keep ctx->Current uptodate:
     */
+   vfmt->FogCoordfvEXT = _mesa_noop_FogCoordfvEXT;
+   vfmt->FogCoordfEXT = _mesa_noop_FogCoordfEXT;
    vfmt->EdgeFlag = _mesa_noop_EdgeFlag;
    vfmt->EdgeFlagv = _mesa_noop_EdgeFlagv;
    vfmt->Indexi = _mesa_noop_Indexi;
@@ -609,10 +981,6 @@ void radeonInitTnlModule( GLcontext *ctx )
 
 
    /* Active but unsupported -- fallback if we receive these:
-    *
-    * All of these fallbacks can be fixed with additional code, except
-    * CallList, unless we build a play_immediate_noop() command which
-    * turns an immediate back into glBegin/glEnd commands...
     */
    vfmt->CallList = radeon_fallback_CallList;
    vfmt->EvalCoord1f = radeon_fallback_EvalCoord1f;
@@ -623,132 +991,137 @@ void radeonInitTnlModule( GLcontext *ctx )
    vfmt->EvalMesh2 = radeon_fallback_EvalMesh2;
    vfmt->EvalPoint1 = radeon_fallback_EvalPoint1;
    vfmt->EvalPoint2 = radeon_fallback_EvalPoint2;
-
-
-   rmesa->imm.prim = PRIM_OUTSIDE_BEGIN_END;
-
-   /* THIS IS A HACK!
-    */
-   _mesa_install_exec_vtxfmt( ctx, vfmt );
+   vfmt->TexCoord3f = radeon_fallback_TexCoord3f;
+   vfmt->TexCoord3fv = radeon_fallback_TexCoord3fv;
+   vfmt->TexCoord4f = radeon_fallback_TexCoord4f;
+   vfmt->TexCoord4fv = radeon_fallback_TexCoord4fv;
+   vfmt->MultiTexCoord3fARB = radeon_fallback_MultiTexCoord3fARB;
+   vfmt->MultiTexCoord3fvARB = radeon_fallback_MultiTexCoord3fvARB;
+   vfmt->MultiTexCoord4fARB = radeon_fallback_MultiTexCoord4fARB;
+   vfmt->MultiTexCoord4fvARB = radeon_fallback_MultiTexCoord4fvARB;
+   vfmt->Vertex4f = radeon_fallback_Vertex4f;
+   vfmt->Vertex4fv = radeon_fallback_Vertex4fv;
+
+   (void)radeon_fallback_vtxfmt;
+
+   TNL_CONTEXT(ctx)->Driver.NotifyBegin = radeonNotifyBegin;
+
+   vb.context = ctx;
+   rmesa->vb.enabled = 1;
+   rmesa->vb.prim = &ctx->Driver.CurrentExecPrimitive;
+   rmesa->vb.primflags = 0;
+
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex2f );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex2fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4ub );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4ubv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3ub );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3ubv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4f );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.Normal3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Normal3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord2f );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord2fv );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord1f );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord1fv );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
+
+   radeonInitCodegen( &rmesa->vb.codegen );
 }
 
-
-
-
-
-
-#if 0
-
-
-
-static void radeon_Begin( GLenum prim )
+static void free_funcs( struct dynfn *l )
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   radeon_prim *tab = &radeon_prim_tab[(int)prim];
-
-   if ( prim > GL_POLYGON ) {
-      gl_error( ctx, GL_INVALID_ENUM, "glBegin" );
-      return;
-   }
-
-   if ( rmesa->prim != PRIM_OUTSIDE_BEGIN_END ) {
-      gl_error( ctx, GL_INVALID_OPERATION, "glBegin" );
-      return;
-   }
-
-   if ( tab->fire_on_vertex ) {
-      rmesa->fire_on_vertex = tab->fire_on_vertex;
-      rmesa->fire_on_end = tab->fire_on_end;
-      rmesa->fire_on_fallback = tab->fire_on_fallback;
-      rmesa->vert = &(rmesa->cache[0]);
-      rmesa->prim = prim;
-      ctx->Driver.NeedFlush |= (FLUSH_INSIDE_BEGIN_END |
-				FLUSH_STORED_VERTICES);
-   } else {
-      radeon_fallback_vtxfmt( ctx );
+   struct dynfn *f, *tmp;
+   foreach_s (f, tmp, l) {
+      remove_from_list( f );
+      ALIGN_FREE( f->code );
+      FREE( f );
    }
 }
 
-static void radeon_End( void )
+void radeonVtxfmtUnbindContext( GLcontext *ctx )
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if ( rmesa->prim == PRIM_OUTSIDE_BEGIN_END ) {
-      gl_error( ctx, GL_INVALID_OPERATION, "glEnd" );
-      return;
+   if (RADEON_CONTEXT(ctx)->vb.installed) {
+      assert(vb.context == ctx);
+      VFMT_FALLBACK_OUTSIDE_BEGIN_END( __FUNCTION__ );
    }
 
-   rmesa->fire_on_end( ctx );
-   rmesa->prim = PRIM_OUTSIDE_BEGIN_END;
-
-   ctx->Exec->Vertex3fv = radeon_noop_Vertex3fv;
-   ctx->Exec->Vertex3f = radeon_noop_Vertex3f;
-   ctx->Exec->Vertex2f = radeon_noop_Vertex2f;
-
-   ctx->Driver.NeedFlush &= ~(FLUSH_INSIDE_BEGIN_END |
-			      FLUSH_STORED_VERTICES);
+   TNL_CONTEXT(ctx)->Driver.NotifyBegin = 0;
 }
 
 
-
-
-static GLboolean radeon_flush_vtxfmt( GLcontext *ctx, GLuint flags )
+void radeonVtxfmtMakeCurrent( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if ( fxMesa->prim != PRIM_OUTSIDE_BEGIN_END )
-      return GL_FALSE;
-
-   /* Outside begin/end.  All vertices will already be flushed, just
-    * update ctx->Current.
-    */
-   if ( flags & FLUSH_UPDATE_CURRENT ) {
-      radeonClipVertexPtr v = &(RADEON_CONTEXT(ctx)->Current);
-      COPY_2FV( ctx->Current.Texcoord[0], v->texcoord[0] );
-      COPY_2FV( ctx->Current.Texcoord[1], v->texcoord[1] );
-      if ( rmesa->accel_light == ACCEL_LIGHT ) {
-	 COPY_3FV( ctx->Current.Normal, v->normal );
-      } else {
-	 ctx->Current.Color[RCOMP] = UBYTE_TO_CHAN( v->v.color.red );
-	 ctx->Current.Color[GCOMP] = UBYTE_TO_CHAN( v->v.color.green );
-	 ctx->Current.Color[BCOMP] = UBYTE_TO_CHAN( v->v.color.blue );
-	 ctx->Current.Color[ACOMP] = UBYTE_TO_CHAN( v->v.color.alpha );
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+#if defined(THREADS)
+   static GLboolean ThreadSafe = GL_FALSE;  /* In thread-safe mode? */
+   if (!ThreadSafe) {
+      static unsigned long knownID;
+      static GLboolean firstCall = GL_TRUE;
+      if (firstCall) {
+         knownID = _glthread_GetID();
+         firstCall = GL_FALSE;
+      }
+      else if (knownID != _glthread_GetID()) {
+         ThreadSafe = GL_TRUE;
 
-	 if ( ctx->Light.ColorMaterialEnabled )
-	    _mesa_update_color_material( ctx, ctx->Current.Color );
+	 if (RADEON_DEBUG & (DEBUG_DRI|DEBUG_VFMT))
+	    fprintf(stderr, "**** Multithread situation!\n");
       }
    }
+   if (ThreadSafe) 
+      return;
+#endif
 
-   /* Could clear this flag and set it from each 'choose' function,
-    * maybe, but there isn't much of a penalty for leaving it set:
-    */
-   ctx->Driver.NeedFlush = FLUSH_UPDATE_CURRENT;
-   return GL_TRUE;
+   if (rmesa->vb.enabled) {
+      TNL_CONTEXT(ctx)->Driver.NotifyBegin = radeonNotifyBegin;
+   }
 }
 
-void radeon_update_lighting( GLcontext *ctx )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   if ( !ctx->Light.Enabled ) {
-      rmesa->accel_light = ACCEL_NO_LIGHT;
-   }
-   else if ( !ctx->Light._NeedVertices && !ctx->Light.Model.TwoSide ) {
-      rmesa->accel_light = ACCEL_LIGHT;
-      radeon_recalc_basecolor( ctx );
-   }
-   else {
-      radeon->accel_light = 0;
-   }
+void radeonVtxfmtDestroy( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   count_funcs( rmesa );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex2f );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex2fv );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex3f );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex3fv );
+   free_funcs( &rmesa->vb.dfn_cache.Color4ub );
+   free_funcs( &rmesa->vb.dfn_cache.Color4ubv );
+   free_funcs( &rmesa->vb.dfn_cache.Color3ub );
+   free_funcs( &rmesa->vb.dfn_cache.Color3ubv );
+   free_funcs( &rmesa->vb.dfn_cache.Color4f );
+   free_funcs( &rmesa->vb.dfn_cache.Color4fv );
+   free_funcs( &rmesa->vb.dfn_cache.Color3f );
+   free_funcs( &rmesa->vb.dfn_cache.Color3fv );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   free_funcs( &rmesa->vb.dfn_cache.Normal3f );
+   free_funcs( &rmesa->vb.dfn_cache.Normal3fv );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord2f );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord2fv );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord1f );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord1fv );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
 }
 
-
-/* How to fallback:
- *   - install default vertex format
- *   - call glBegin
- *   - revive stalled vertices (may be reordered).
- *   - re-issue call that caused fallback.
- */
-#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.h b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.h
index b388d7364..9b82756be 100644
--- a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.h
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.h
@@ -1,30 +1,36 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.h,v 1.1 2002/02/22 21:45:01 dawes Exp $ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.h,v 1.3 2002/12/21 17:02:16 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
 /*
- * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
  * Authors:
- *    Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
  */
 
 #ifndef __RADEON_VTXFMT_H__
@@ -34,13 +40,89 @@
 
 #include "radeon_context.h"
 
-extern void radeonInitTnlModule( GLcontext *ctx );
 
 
-extern radeon_flush_func radeon_flush_tab_tiny[GL_POLYGON+1];
-extern radeon_flush_func radeon_flush_tab_notex[GL_POLYGON+1];
-extern radeon_flush_func radeon_flush_tab_tex0[GL_POLYGON+1];
-extern radeon_flush_func radeon_flush_tab_tex1[GL_POLYGON+1];
+extern struct radeon_vb vb;
+
+
+extern void radeonVtxfmtUpdate( GLcontext *ctx );
+extern void radeonVtxfmtInit( GLcontext *ctx );
+extern void radeonVtxfmtInvalidate( GLcontext *ctx );
+extern void radeonVtxfmtDestroy( GLcontext *ctx );
+extern void radeonVtxfmtInitChoosers( GLvertexformat *vfmt );
+
+extern void radeonVtxfmtMakeCurrent( GLcontext *ctx );
+extern void radeonVtxfmtUnbindContext( GLcontext *ctx );
+
+extern void radeon_copy_to_current( GLcontext *ctx );
+
+#define DFN( FUNC, CACHE)				\
+do {							\
+   char *start = (char *)&FUNC;				\
+   char *end = (char *)&FUNC##_end;			\
+   insert_at_head( &CACHE, dfn );			\
+   dfn->key = key;					\
+   dfn->code = ALIGN_MALLOC( end - start, 16 );		\
+   memcpy (dfn->code, start, end - start);		\
+}							\
+while ( 0 )
+
+#define FIXUP( CODE, OFFSET, CHECKVAL, NEWVAL )	\
+do {						\
+   int *icode = (int *)(CODE+OFFSET);		\
+   assert (*icode == CHECKVAL);			\
+   *icode = (int)NEWVAL;			\
+} while (0)
+
+
+/* Useful for figuring out the offsets:
+ */
+#define FIXUP2( CODE, OFFSET, CHECKVAL, NEWVAL )				\
+do {										\
+   while (*(int *)(CODE+OFFSET) != CHECKVAL) OFFSET++;				\
+   /* fprintf(stderr, "%s/%d CVAL %x OFFSET %d VAL %x\n", __FUNCTION__, */	\
+/* 	   __LINE__, CHECKVAL, OFFSET, (int)(NEWVAL)); */			\
+   *(int *)(CODE+OFFSET) = (int)(NEWVAL);					\
+   OFFSET += 4;									\
+} while (0)
+
+/* 
+ */
+void radeonInitCodegen( struct dfn_generators *gen );
+void radeonInitX86Codegen( struct dfn_generators *gen );
+void radeonInitSSECodegen( struct dfn_generators *gen );
+
+
+
+/* Defined in radeon_vtxfmt_x86.c
+ */
+struct dynfn *radeon_makeX86Vertex2f( GLcontext *, int );
+struct dynfn *radeon_makeX86Vertex2fv( GLcontext *, int );
+struct dynfn *radeon_makeX86Vertex3f( GLcontext *, int );
+struct dynfn *radeon_makeX86Vertex3fv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4ub( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4ubv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3ub( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3ubv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4f( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4fv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3f( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3fv( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3ubEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3ubvEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3fEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3fvEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86Normal3f( GLcontext *, int );
+struct dynfn *radeon_makeX86Normal3fv( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord2f( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord2fv( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord1f( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord1fv( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord2fARB( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord2fvARB( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord1fARB( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord1fvARB( GLcontext *, int );
+
 
 #endif
 #endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_c.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_c.c
new file mode 100644
index 000000000..fadfc3077
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_c.c
@@ -0,0 +1,801 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_c.c,v 1.2 2002/12/16 16:18:59 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "mtypes.h"
+#include "colormac.h"
+#include "simple_list.h"
+#include "api_noop.h"
+#include "vtxfmt.h"
+
+#include "radeon_vtxfmt.h"
+
+/* Fallback versions of all the entrypoints for situations where
+ * codegen isn't available.  This is still a lot faster than the
+ * vb/pipeline implementation in Mesa.
+ */
+static void radeon_Vertex3f( GLfloat x, GLfloat y, GLfloat z )
+{
+   int i;
+
+   *vb.dmaptr++ = *(int *)&x;
+   *vb.dmaptr++ = *(int *)&y;
+   *vb.dmaptr++ = *(int *)&z;
+
+   for (i = 3; i < vb.vertex_size; i++)
+      *vb.dmaptr++ = vb.vertex[i].i;
+   
+   if (--vb.counter == 0)
+      vb.notify();
+}
+
+
+static void radeon_Vertex3fv( const GLfloat *v )
+{
+   int i;
+
+   *vb.dmaptr++ = *(int *)&v[0];
+   *vb.dmaptr++ = *(int *)&v[1];
+   *vb.dmaptr++ = *(int *)&v[2];
+
+   for (i = 3; i < vb.vertex_size; i++)
+      *vb.dmaptr++ = vb.vertex[i].i;
+   
+   if (--vb.counter == 0)
+      vb.notify();
+}
+
+
+static void radeon_Vertex2f( GLfloat x, GLfloat y )
+{
+   int i;
+
+   *vb.dmaptr++ = *(int *)&x;
+   *vb.dmaptr++ = *(int *)&y;
+   *vb.dmaptr++ = 0;
+
+   for (i = 3; i < vb.vertex_size; i++)
+      *vb.dmaptr++ = *(int *)&vb.vertex[i];
+   
+   if (--vb.counter == 0)
+      vb.notify();
+}
+
+
+static void radeon_Vertex2fv( const GLfloat *v )
+{
+   int i;
+
+   *vb.dmaptr++ = *(int *)&v[0];
+   *vb.dmaptr++ = *(int *)&v[1];
+   *vb.dmaptr++ = 0;
+
+   for (i = 3; i < vb.vertex_size; i++)
+      *vb.dmaptr++ = vb.vertex[i].i;
+   
+   if (--vb.counter == 0)
+      vb.notify();
+}
+
+
+
+/* Color for ubyte (packed) color formats:
+ */
+static void radeon_Color3ub_ub( GLubyte r, GLubyte g, GLubyte b )
+{
+   radeon_color_t *dest = vb.colorptr;
+   dest->red	= r;
+   dest->green	= g;
+   dest->blue	= b;
+   dest->alpha	= 0xff;
+}
+
+static void radeon_Color3ubv_ub( const GLubyte *v )
+{
+   radeon_color_t *dest = vb.colorptr;
+   dest->red	= v[0];
+   dest->green	= v[1];
+   dest->blue	= v[2];
+   dest->alpha	= 0xff;
+}
+
+static void radeon_Color4ub_ub( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   radeon_color_t *dest = vb.colorptr;
+   dest->red	= r;
+   dest->green	= g;
+   dest->blue	= b;
+   dest->alpha	= a;
+}
+
+static void radeon_Color4ubv_ub( const GLubyte *v )
+{
+   *(GLuint *)vb.colorptr = LE32_TO_CPU(*(GLuint *)v);
+}
+
+
+static void radeon_Color3f_ub( GLfloat r, GLfloat g, GLfloat b )
+{
+   radeon_color_t *dest = vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,   r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  b );
+   dest->alpha = 255;
+}
+
+static void radeon_Color3fv_ub( const GLfloat *v )
+{
+   radeon_color_t *dest = vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,   v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  v[2] );
+   dest->alpha = 255;
+}
+
+static void radeon_Color4f_ub( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   radeon_color_t *dest = vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,   r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  b );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->alpha, a );
+}
+
+static void radeon_Color4fv_ub( const GLfloat *v )
+{
+   radeon_color_t *dest = vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,	  v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  v[2] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->alpha, v[3] );
+}
+
+
+/* Color for float color+alpha formats:
+ */
+static void radeon_Color3ub_4f( GLubyte r, GLubyte g, GLubyte b )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   dest[3] = 1.0;
+}
+
+static void radeon_Color3ubv_4f( const GLubyte *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   dest[3] = 1.0;
+}
+
+static void radeon_Color4ub_4f( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   dest[3] = UBYTE_TO_FLOAT(a);
+}
+
+static void radeon_Color4ubv_4f( const GLubyte *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   dest[3] = UBYTE_TO_FLOAT(v[3]);
+}
+
+
+static void radeon_Color3f_4f( GLfloat r, GLfloat g, GLfloat b )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = 1.0;		
+}
+
+static void radeon_Color3fv_4f( const GLfloat *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = 1.0;
+}
+
+static void radeon_Color4f_4f( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = a;
+}
+
+static void radeon_Color4fv_4f( const GLfloat *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = v[3];
+}
+
+
+/* Color for float color formats:
+ */
+static void radeon_Color3ub_3f( GLubyte r, GLubyte g, GLubyte b )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+}
+
+static void radeon_Color3ubv_3f( const GLubyte *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+}
+
+static void radeon_Color4ub_3f( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   vb.context->Current.Color[3] = UBYTE_TO_FLOAT(a);
+}
+
+static void radeon_Color4ubv_3f( const GLubyte *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   vb.context->Current.Color[3] = UBYTE_TO_FLOAT(v[3]);
+}
+
+
+static void radeon_Color3f_3f( GLfloat r, GLfloat g, GLfloat b )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+}
+
+static void radeon_Color3fv_3f( const GLfloat *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+}
+
+static void radeon_Color4f_3f( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   vb.context->Current.Color[3] = a;
+}
+
+static void radeon_Color4fv_3f( const GLfloat *v )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   vb.context->Current.Color[3] = v[3]; 
+}
+
+
+/* Secondary Color:
+ */
+static void radeon_SecondaryColor3ubEXT_ub( GLubyte r, GLubyte g, GLubyte b )
+{
+   radeon_color_t *dest = vb.specptr;
+   dest->red	= r;
+   dest->green	= g;
+   dest->blue	= b;
+   dest->alpha	= 0xff;
+}
+
+static void radeon_SecondaryColor3ubvEXT_ub( const GLubyte *v )
+{
+   radeon_color_t *dest = vb.specptr;
+   dest->red	= v[0];
+   dest->green	= v[1];
+   dest->blue	= v[2];
+   dest->alpha	= 0xff;
+}
+
+static void radeon_SecondaryColor3fEXT_ub( GLfloat r, GLfloat g, GLfloat b )
+{
+   radeon_color_t *dest = vb.specptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,	  r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  b );
+   dest->alpha = 255;
+}
+
+static void radeon_SecondaryColor3fvEXT_ub( const GLfloat *v )
+{
+   radeon_color_t *dest = vb.specptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,	  v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  v[2] );
+   dest->alpha = 255;
+}
+
+static void radeon_SecondaryColor3ubEXT_3f( GLubyte r, GLubyte g, GLubyte b )
+{
+   GLfloat *dest = vb.floatspecptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   dest[3] = 1.0;
+}
+
+static void radeon_SecondaryColor3ubvEXT_3f( const GLubyte *v )
+{
+   GLfloat *dest = vb.floatspecptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   dest[3] = 1.0;
+}
+
+static void radeon_SecondaryColor3fEXT_3f( GLfloat r, GLfloat g, GLfloat b )
+{
+   GLfloat *dest = vb.floatspecptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = 1.0;
+}
+
+static void radeon_SecondaryColor3fvEXT_3f( const GLfloat *v )
+{
+   GLfloat *dest = vb.floatspecptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = 1.0;
+}
+
+
+/* Normal
+ */
+static void radeon_Normal3f( GLfloat n0, GLfloat n1, GLfloat n2 )
+{
+   GLfloat *dest = vb.normalptr;
+   dest[0] = n0;
+   dest[1] = n1;
+   dest[2] = n2;
+}
+
+static void radeon_Normal3fv( const GLfloat *v )
+{
+   GLfloat *dest = vb.normalptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+}
+
+
+/* TexCoord
+ */
+static void radeon_TexCoord1f( GLfloat s )
+{
+   GLfloat *dest = vb.texcoordptr[0];
+   dest[0] = s;
+   dest[1] = 0;
+}
+
+static void radeon_TexCoord1fv( const GLfloat *v )
+{
+   GLfloat *dest = vb.texcoordptr[0];
+   dest[0] = v[0];
+   dest[1] = 0;
+}
+
+static void radeon_TexCoord2f( GLfloat s, GLfloat t )
+{
+   GLfloat *dest = vb.texcoordptr[0];
+   dest[0] = s;
+   dest[1] = t;
+}
+
+static void radeon_TexCoord2fv( const GLfloat *v )
+{
+   GLfloat *dest = vb.texcoordptr[0];
+   dest[0] = v[0];
+   dest[1] = v[1];
+}
+
+
+/* MultiTexcoord
+ */
+static void radeon_MultiTexCoord1fARB( GLenum target, GLfloat s  )
+{
+   GLfloat *dest = vb.texcoordptr[(target - GL_TEXTURE0_ARB)&1];
+   dest[0] = s;
+   dest[1] = 0;
+}
+
+static void radeon_MultiTexCoord1fvARB( GLenum target, const GLfloat *v )
+{
+   GLfloat *dest = vb.texcoordptr[(target - GL_TEXTURE0_ARB)&1];
+   dest[0] = v[0];
+   dest[1] = 0;
+}
+
+static void radeon_MultiTexCoord2fARB( GLenum target, GLfloat s, GLfloat t )
+{
+   GLfloat *dest = vb.texcoordptr[(target - GL_TEXTURE0_ARB)&1];
+   dest[0] = s;
+   dest[1] = t;
+}
+
+static void radeon_MultiTexCoord2fvARB( GLenum target, const GLfloat *v )
+{
+   GLfloat *dest = vb.texcoordptr[(target - GL_TEXTURE0_ARB)&1];
+   dest[0] = v[0];
+   dest[1] = v[1];
+}
+
+static struct dynfn *lookup( struct dynfn *l, int key )
+{
+   struct dynfn *f;
+
+   foreach( f, l ) {
+      if (f->key == key) 
+	 return f;
+   }
+
+   return 0;
+}
+
+/* Can't use the loopback template for this:
+ */
+
+#define CHOOSE(FN, FNTYPE, MASK, ACTIVE, ARGS1, ARGS2 )			\
+static void choose_##FN ARGS1						\
+{									\
+   radeonContextPtr rmesa = RADEON_CONTEXT(vb.context);			\
+   int key = rmesa->vb.vertex_format & (MASK|ACTIVE);			\
+   struct dynfn *dfn = lookup( &rmesa->vb.dfn_cache.FN, key );		\
+									\
+   if (dfn == 0)							\
+      dfn = rmesa->vb.codegen.FN( vb.context, key );			\
+   else if (RADEON_DEBUG & DEBUG_CODEGEN)				\
+      fprintf(stderr, "%s -- cached codegen\n", __FUNCTION__ );		\
+									\
+   if (dfn)								\
+      vb.context->Exec->FN = (FNTYPE)(dfn->code);			\
+   else {								\
+      if (RADEON_DEBUG & DEBUG_CODEGEN)					\
+	 fprintf(stderr, "%s -- generic version\n", __FUNCTION__ );	\
+      vb.context->Exec->FN = radeon_##FN;				\
+   }									\
+									\
+   vb.context->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;		\
+   vb.context->Exec->FN ARGS2;						\
+}
+
+
+
+/* For the _3f case, only allow one color function to be hooked in at
+ * a time.  Eventually, use a similar mechanism to allow selecting the
+ * color component of the vertex format based on client behaviour.  
+ *
+ * Note:  Perform these actions even if there is a codegen or cached 
+ * codegen version of the chosen function.
+ */
+#define CHOOSE_COLOR(FN, FNTYPE, NR, MASK, ACTIVE, ARGS1, ARGS2 )	\
+static void choose_##FN ARGS1						\
+{									\
+   GLcontext *ctx = vb.context;						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(vb.context);			\
+   int key = rmesa->vb.vertex_format & (MASK|ACTIVE);			\
+   struct dynfn *dfn;							\
+									\
+   if (rmesa->vb.vertex_format & ACTIVE_PKCOLOR) {			\
+      ctx->Exec->FN = radeon_##FN##_ub;					\
+   }									\
+   else if ((rmesa->vb.vertex_format &					\
+            (ACTIVE_FPCOLOR|ACTIVE_FPALPHA)) == ACTIVE_FPCOLOR) {	\
+									\
+      if (rmesa->vb.installed_color_3f_sz != NR) {			\
+         rmesa->vb.installed_color_3f_sz = NR;				\
+         if (NR == 3) ctx->Current.Color[3] = 1.0;			\
+         if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) {		\
+            radeon_copy_to_current( ctx );				\
+            _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );	\
+            ctx->Exec->FN ARGS2;					\
+            return;							\
+         }								\
+      }									\
+									\
+      ctx->Exec->FN = radeon_##FN##_3f;					\
+   }									\
+   else {								\
+      ctx->Exec->FN = radeon_##FN##_4f;					\
+   }									\
+									\
+									\
+   dfn = lookup( &rmesa->vb.dfn_cache.FN, key );			\
+   if (!dfn) dfn = rmesa->vb.codegen.FN( ctx, key );			\
+									\
+   if (dfn) {								\
+      if (RADEON_DEBUG & DEBUG_CODEGEN)					\
+         fprintf(stderr, "%s -- codegen version\n", __FUNCTION__ );	\
+      ctx->Exec->FN = (FNTYPE)dfn->code;				\
+   }									\
+   else if (RADEON_DEBUG & DEBUG_CODEGEN)				\
+         fprintf(stderr, "%s -- 'c' version\n", __FUNCTION__ );		\
+									\
+   ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;			\
+   ctx->Exec->FN ARGS2;							\
+}
+
+
+
+/* Right now there are both _ub and _3f versions of the secondary color
+ * functions.  Currently, we only set-up the hardware to use the _ub versions.
+ * The _3f versions are needed for the cases where secondary color isn't used
+ * in the vertex format, but it still needs to be stored in the context
+ * state vector.
+ */
+#define CHOOSE_SECONDARY_COLOR(FN, FNTYPE, MASK, ACTIVE, ARGS1, ARGS2 )	\
+static void choose_##FN ARGS1						\
+{									\
+   GLcontext *ctx = vb.context;						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(vb.context);			\
+   int key = rmesa->vb.vertex_format & (MASK|ACTIVE);			\
+   struct dynfn *dfn = lookup( &rmesa->vb.dfn_cache.FN, key );		\
+									\
+   if (dfn == 0)							\
+      dfn = rmesa->vb.codegen.FN( vb.context, key );			\
+   else  if (RADEON_DEBUG & DEBUG_CODEGEN)				\
+      fprintf(stderr, "%s -- cached version\n", __FUNCTION__ );		\
+									\
+   if (dfn)								\
+      vb.context->Exec->FN = (FNTYPE)(dfn->code);			\
+   else {								\
+      if (RADEON_DEBUG & DEBUG_CODEGEN)					\
+         fprintf(stderr, "%s -- generic version\n", __FUNCTION__ );	\
+      vb.context->Exec->FN = ((rmesa->vb.vertex_format & ACTIVE_PKSPEC) != 0) \
+	  ? radeon_##FN##_ub : radeon_##FN##_3f;			\
+   }									\
+									\
+   ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;			\
+   ctx->Exec->FN ARGS2;							\
+}
+
+
+
+
+
+/* Shorthands
+ */
+#define ACTIVE_XYZW (RADEON_CP_VC_FRMT_W0|RADEON_CP_VC_FRMT_Z)
+#define ACTIVE_NORM RADEON_CP_VC_FRMT_N0
+
+#define ACTIVE_PKCOLOR RADEON_CP_VC_FRMT_PKCOLOR
+#define ACTIVE_FPCOLOR RADEON_CP_VC_FRMT_FPCOLOR
+#define ACTIVE_FPALPHA RADEON_CP_VC_FRMT_FPALPHA
+#define ACTIVE_COLOR (ACTIVE_FPCOLOR|ACTIVE_PKCOLOR)
+
+#define ACTIVE_PKSPEC RADEON_CP_VC_FRMT_PKSPEC
+#define ACTIVE_FPSPEC RADEON_CP_VC_FRMT_FPSPEC
+#define ACTIVE_SPEC   (ACTIVE_FPSPEC|ACTIVE_PKSPEC)
+
+#define ACTIVE_ST0 RADEON_CP_VC_FRMT_ST0
+#define ACTIVE_ST1 RADEON_CP_VC_FRMT_ST1
+#define ACTIVE_ST_ALL (RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST0)
+
+/* Each codegen function should be able to be fully specified by a
+ * subsetted version of rmesa->vb.vertex_format.
+ */
+#define MASK_NORM    (ACTIVE_XYZW)
+#define MASK_COLOR   (MASK_NORM|ACTIVE_NORM)
+#define MASK_SPEC    (MASK_COLOR|ACTIVE_COLOR)
+#define MASK_ST0     (MASK_SPEC|ACTIVE_SPEC)
+#define MASK_ST1     (MASK_ST0|ACTIVE_ST0)
+#define MASK_ST_ALL  (MASK_ST1|ACTIVE_ST1)
+#define MASK_VERTEX  (MASK_ST_ALL|ACTIVE_FPALPHA) 
+
+
+typedef void (*p4f)( GLfloat, GLfloat, GLfloat, GLfloat );
+typedef void (*p3f)( GLfloat, GLfloat, GLfloat );
+typedef void (*p2f)( GLfloat, GLfloat );
+typedef void (*p1f)( GLfloat );
+typedef void (*pe2f)( GLenum, GLfloat, GLfloat );
+typedef void (*pe1f)( GLenum, GLfloat );
+typedef void (*p4ub)( GLubyte, GLubyte, GLubyte, GLubyte );
+typedef void (*p3ub)( GLubyte, GLubyte, GLubyte );
+typedef void (*pfv)( const GLfloat * );
+typedef void (*pefv)( GLenum, const GLfloat * );
+typedef void (*pubv)( const GLubyte * );
+
+
+CHOOSE(Normal3f, p3f, MASK_NORM, ACTIVE_NORM, 
+       (GLfloat a,GLfloat b,GLfloat c), (a,b,c))
+CHOOSE(Normal3fv, pfv, MASK_NORM, ACTIVE_NORM, 
+       (const GLfloat *v), (v))
+
+CHOOSE_COLOR(Color4ub, p4ub, 4, MASK_COLOR, ACTIVE_COLOR,
+	(GLubyte a,GLubyte b, GLubyte c, GLubyte d), (a,b,c,d))
+CHOOSE_COLOR(Color4ubv, pubv, 4, MASK_COLOR, ACTIVE_COLOR, 
+	(const GLubyte *v), (v))
+CHOOSE_COLOR(Color3ub, p3ub, 3, MASK_COLOR, ACTIVE_COLOR, 
+	(GLubyte a,GLubyte b, GLubyte c), (a,b,c))
+CHOOSE_COLOR(Color3ubv, pubv, 3, MASK_COLOR, ACTIVE_COLOR, 
+	(const GLubyte *v), (v))
+
+CHOOSE_COLOR(Color4f, p4f, 4, MASK_COLOR, ACTIVE_COLOR, 
+	(GLfloat a,GLfloat b, GLfloat c, GLfloat d), (a,b,c,d))
+CHOOSE_COLOR(Color4fv, pfv, 4, MASK_COLOR, ACTIVE_COLOR, 
+	(const GLfloat *v), (v))
+CHOOSE_COLOR(Color3f, p3f, 3, MASK_COLOR, ACTIVE_COLOR,
+	(GLfloat a,GLfloat b, GLfloat c), (a,b,c))
+CHOOSE_COLOR(Color3fv, pfv, 3, MASK_COLOR, ACTIVE_COLOR,
+	(const GLfloat *v), (v))
+
+
+CHOOSE_SECONDARY_COLOR(SecondaryColor3ubEXT, p3ub, MASK_SPEC, ACTIVE_SPEC,
+	(GLubyte a,GLubyte b, GLubyte c), (a,b,c))
+CHOOSE_SECONDARY_COLOR(SecondaryColor3ubvEXT, pubv, MASK_SPEC, ACTIVE_SPEC,
+	(const GLubyte *v), (v))
+CHOOSE_SECONDARY_COLOR(SecondaryColor3fEXT, p3f, MASK_SPEC, ACTIVE_SPEC,
+	(GLfloat a,GLfloat b, GLfloat c), (a,b,c))
+CHOOSE_SECONDARY_COLOR(SecondaryColor3fvEXT, pfv, MASK_SPEC, ACTIVE_SPEC,
+	(const GLfloat *v), (v))
+
+CHOOSE(TexCoord2f, p2f, MASK_ST0, ACTIVE_ST0, 
+       (GLfloat a,GLfloat b), (a,b))
+CHOOSE(TexCoord2fv, pfv, MASK_ST0, ACTIVE_ST0, 
+       (const GLfloat *v), (v))
+CHOOSE(TexCoord1f, p1f, MASK_ST0, ACTIVE_ST0, 
+       (GLfloat a), (a))
+CHOOSE(TexCoord1fv, pfv, MASK_ST0, ACTIVE_ST0, 
+       (const GLfloat *v), (v))
+
+CHOOSE(MultiTexCoord2fARB, pe2f, MASK_ST_ALL, ACTIVE_ST_ALL,
+	 (GLenum u,GLfloat a,GLfloat b), (u,a,b))
+CHOOSE(MultiTexCoord2fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL,
+	(GLenum u,const GLfloat *v), (u,v))
+CHOOSE(MultiTexCoord1fARB, pe1f, MASK_ST_ALL, ACTIVE_ST_ALL,
+	 (GLenum u,GLfloat a), (u,a))
+CHOOSE(MultiTexCoord1fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL,
+	(GLenum u,const GLfloat *v), (u,v))
+
+CHOOSE(Vertex3f, p3f, MASK_VERTEX, MASK_VERTEX, 
+       (GLfloat a,GLfloat b,GLfloat c), (a,b,c))
+CHOOSE(Vertex3fv, pfv, MASK_VERTEX, MASK_VERTEX, 
+       (const GLfloat *v), (v))
+CHOOSE(Vertex2f, p2f, MASK_VERTEX, MASK_VERTEX, 
+       (GLfloat a,GLfloat b), (a,b))
+CHOOSE(Vertex2fv, pfv, MASK_VERTEX, MASK_VERTEX, 
+       (const GLfloat *v), (v))
+
+
+
+
+
+void radeonVtxfmtInitChoosers( GLvertexformat *vfmt )
+{
+   vfmt->Color3f = choose_Color3f;
+   vfmt->Color3fv = choose_Color3fv;
+   vfmt->Color3ub = choose_Color3ub;
+   vfmt->Color3ubv = choose_Color3ubv;
+   vfmt->Color4f = choose_Color4f;
+   vfmt->Color4fv = choose_Color4fv;
+   vfmt->Color4ub = choose_Color4ub;
+   vfmt->Color4ubv = choose_Color4ubv;
+   vfmt->SecondaryColor3fEXT = choose_SecondaryColor3fEXT;
+   vfmt->SecondaryColor3fvEXT = choose_SecondaryColor3fvEXT;
+   vfmt->SecondaryColor3ubEXT = choose_SecondaryColor3ubEXT;
+   vfmt->SecondaryColor3ubvEXT = choose_SecondaryColor3ubvEXT;
+   vfmt->MultiTexCoord1fARB = choose_MultiTexCoord1fARB;
+   vfmt->MultiTexCoord1fvARB = choose_MultiTexCoord1fvARB;
+   vfmt->MultiTexCoord2fARB = choose_MultiTexCoord2fARB;
+   vfmt->MultiTexCoord2fvARB = choose_MultiTexCoord2fvARB;
+   vfmt->Normal3f = choose_Normal3f;
+   vfmt->Normal3fv = choose_Normal3fv;
+   vfmt->TexCoord1f = choose_TexCoord1f;
+   vfmt->TexCoord1fv = choose_TexCoord1fv;
+   vfmt->TexCoord2f = choose_TexCoord2f;
+   vfmt->TexCoord2fv = choose_TexCoord2fv;
+   vfmt->Vertex2f = choose_Vertex2f;
+   vfmt->Vertex2fv = choose_Vertex2fv;
+   vfmt->Vertex3f = choose_Vertex3f;
+   vfmt->Vertex3fv = choose_Vertex3fv;
+}
+
+
+static struct dynfn *codegen_noop( GLcontext *ctx, int key )
+{
+   (void) ctx; (void) key;
+   return 0;
+}
+
+void radeonInitCodegen( struct dfn_generators *gen )
+{
+   gen->Vertex3f = codegen_noop;
+   gen->Vertex3fv = codegen_noop;
+   gen->Color4ub = codegen_noop;
+   gen->Color4ubv = codegen_noop;
+   gen->Normal3f = codegen_noop;
+   gen->Normal3fv = codegen_noop;
+   gen->TexCoord2f = codegen_noop;
+   gen->TexCoord2fv = codegen_noop;
+   gen->MultiTexCoord2fARB = codegen_noop;
+   gen->MultiTexCoord2fvARB = codegen_noop;
+   gen->Vertex2f = codegen_noop;
+   gen->Vertex2fv = codegen_noop;
+   gen->Color3ub = codegen_noop;
+   gen->Color3ubv = codegen_noop;
+   gen->Color4f = codegen_noop;
+   gen->Color4fv = codegen_noop;
+   gen->Color3f = codegen_noop;
+   gen->Color3fv = codegen_noop;
+   gen->SecondaryColor3fEXT = codegen_noop;
+   gen->SecondaryColor3fvEXT = codegen_noop;
+   gen->SecondaryColor3ubEXT = codegen_noop;
+   gen->SecondaryColor3ubvEXT = codegen_noop;
+   gen->TexCoord1f = codegen_noop;
+   gen->TexCoord1fv = codegen_noop;
+   gen->MultiTexCoord1fARB = codegen_noop;
+   gen->MultiTexCoord1fvARB = codegen_noop;
+
+   if (!getenv("RADEON_NO_CODEGEN")) {
+#if defined(USE_X86_ASM)
+      radeonInitX86Codegen( gen );
+#endif
+
+#if defined(USE_SSE_ASM)
+      radeonInitSSECodegen( gen );
+#endif
+   }
+}
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_sse.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_sse.c
new file mode 100644
index 000000000..0df3062be
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_sse.c
@@ -0,0 +1,88 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_sse.c,v 1.1 2002/10/30 12:51:58 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include "mem.h" 
+#include "simple_list.h" 
+#include "radeon_vtxfmt.h"
+
+#if defined(USE_SSE_ASM)
+#include "X86/common_x86_asm.h"
+
+/* Build specialized versions of the immediate calls on the fly for
+ * the current state.  ???P4 SSE2 versions???
+ */
+
+
+static struct dynfn *makeSSENormal3fv( GLcontext *ctx, int key )
+{
+   /* Requires P4 (sse2?)
+    */
+   static unsigned char temp[] = {
+      0x8b, 0x44, 0x24, 0x04,          	/*  mov    0x4(%esp,1),%eax */
+      0xba, 0x78, 0x56, 0x34, 0x12,   	/*  mov    $0x12345678,%edx */
+      0xf3, 0x0f, 0x7e, 0x00,          	/*  movq   (%eax),%xmm0 */
+      0x66, 0x0f, 0x6e, 0x48, 0x08,    	/*  movd   0x8(%eax),%xmm1 */
+      0x66, 0x0f, 0xd6, 0x42, 0x0c,    	/*  movq   %xmm0,0xc(%edx) */
+      0x66, 0x0f, 0x7e, 0x4a, 0x14,    	/*  movd   %xmm1,0x14(%edx) */
+      0xc3,                   	        /*  ret     */
+   };
+
+
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   insert_at_head( &rmesa->vb.dfn_cache.Normal3fv, dfn );
+   dfn->key = key;
+
+   dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+   memcpy (dfn->code, temp, sizeof(temp));
+   FIXUP(dfn->code, 5, 0x0, (int)vb.normalptr); 
+   return dfn;
+}
+
+void radeonInitSSECodegen( struct dfn_generators *gen )
+{
+   if ( cpu_has_xmm && cpu_has_xmm2 )
+      /*gen->Normal3fv = */ (void)makeSSENormal3fv;
+}
+
+#else 
+
+void radeonInitSSECodegen( struct dfn_generators *gen )
+{
+   (void) gen;
+}
+
+#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_x86.c b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_x86.c
new file mode 100644
index 000000000..ad7d9308e
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_x86.c
@@ -0,0 +1,463 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_x86.c,v 1.2 2002/12/21 17:02:16 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include "mem.h" 
+#include "mmath.h" 
+#include "simple_list.h" 
+#include "radeon_vtxfmt.h"
+
+#if defined(USE_X86_ASM)
+
+#define EXTERN( FUNC )		\
+extern const char *FUNC;	\
+extern const char *FUNC##_end
+
+EXTERN ( _x86_Normal3fv );
+EXTERN ( _x86_Normal3f );
+EXTERN ( _x86_Vertex3fv_6 );
+EXTERN ( _x86_Vertex3fv_8 );
+EXTERN ( _x86_Vertex3fv );
+EXTERN ( _x86_Vertex3f_4 );
+EXTERN ( _x86_Vertex3f_6 );
+EXTERN ( _x86_Vertex3f );
+EXTERN ( _x86_Color4ubv_ub );
+EXTERN ( _x86_Color4ubv_4f );
+EXTERN ( _x86_Color4ub_ub );
+EXTERN ( _x86_Color3fv_3f );
+EXTERN ( _x86_Color3f_3f );
+EXTERN ( _x86_TexCoord2fv );
+EXTERN ( _x86_TexCoord2f );
+EXTERN ( _x86_MultiTexCoord2fvARB );
+EXTERN ( _x86_MultiTexCoord2fvARB_2 );
+EXTERN ( _x86_MultiTexCoord2fARB );
+EXTERN ( _x86_MultiTexCoord2fARB_2 );
+
+
+/* Build specialized versions of the immediate calls on the fly for
+ * the current state.  Generic x86 versions.
+ */
+
+struct dynfn *radeon_makeX86Vertex3f( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x %d\n", __FUNCTION__, key, vb.vertex_size );
+
+   switch (vb.vertex_size) {
+   case 4: {
+
+      DFN ( _x86_Vertex3f_4, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 2, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 25, 0x0, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 36, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 46, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 51, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 60, 0x0, (int)&vb.notify);
+      break;
+   }
+   case 6: {
+
+      DFN ( _x86_Vertex3f_6, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 3, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 28, 0x0, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 34, 0x0, (int)&vb.vertex[4]);
+      FIXUP(dfn->code, 40, 0x0, (int)&vb.vertex[5]);
+      FIXUP(dfn->code, 57, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 63, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 70, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 79, 0x0, (int)&vb.notify);
+      break;
+   }
+   default: {
+
+      DFN ( _x86_Vertex3f, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 3, 0x0, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 9, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 37, 0x0, vb.vertex_size-3);
+      FIXUP(dfn->code, 44, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 50, 0x0, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 56, 0x0, (int)&vb.counter);
+      FIXUP(dfn->code, 67, 0x0, (int)&vb.notify);
+   break;
+   }
+   }
+
+   return dfn;
+}
+
+
+
+struct dynfn *radeon_makeX86Vertex3fv( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x %d\n", __FUNCTION__, key, vb.vertex_size );
+
+   switch (vb.vertex_size) {
+   case 6: {
+
+      DFN ( _x86_Vertex3fv_6, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 1, 0x00000000, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 27, 0x0000001c, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 33, 0x00000020, (int)&vb.vertex[4]);
+      FIXUP(dfn->code, 45, 0x00000024, (int)&vb.vertex[5]);
+      FIXUP(dfn->code, 56, 0x00000000, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 61, 0x00000004, (int)&vb.counter);
+      FIXUP(dfn->code, 67, 0x00000004, (int)&vb.counter);
+      FIXUP(dfn->code, 76, 0x00000008, (int)&vb.notify);
+      break;
+   }
+   
+
+   case 8: {
+
+      DFN ( _x86_Vertex3fv_8, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 1, 0x00000000, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 27, 0x0000001c, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 33, 0x00000020, (int)&vb.vertex[4]);
+      FIXUP(dfn->code, 45, 0x0000001c, (int)&vb.vertex[5]);
+      FIXUP(dfn->code, 51, 0x00000020, (int)&vb.vertex[6]);
+      FIXUP(dfn->code, 63, 0x00000024, (int)&vb.vertex[7]);
+      FIXUP(dfn->code, 74, 0x00000000, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 79, 0x00000004, (int)&vb.counter);
+      FIXUP(dfn->code, 85, 0x00000004, (int)&vb.counter);
+      FIXUP(dfn->code, 94, 0x00000008, (int)&vb.notify);
+      break;
+   }
+   
+
+
+   default: {
+
+      DFN ( _x86_Vertex3fv, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 8, 0x01010101, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 32, 0x00000006, vb.vertex_size-3);
+      FIXUP(dfn->code, 37, 0x00000058, (int)&vb.vertex[3]);
+      FIXUP(dfn->code, 45, 0x01010101, (int)&vb.dmaptr);
+      FIXUP(dfn->code, 50, 0x02020202, (int)&vb.counter);
+      FIXUP(dfn->code, 58, 0x02020202, (int)&vb.counter);
+      FIXUP(dfn->code, 67, 0x0, (int)&vb.notify);
+   break;
+   }
+   }
+
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86Normal3fv( GLcontext *ctx, int key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int i = 0;
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   DFN ( _x86_Normal3fv, rmesa->vb.dfn_cache.Normal3fv );
+
+   FIXUP2(dfn->code, i, 0x0, (int)vb.normalptr); 
+   FIXUP2(dfn->code, i, 0x4, 4+(int)vb.normalptr); 
+   FIXUP2(dfn->code, i, 0x8, 8+(int)vb.normalptr); 
+   /* fprintf(stderr, "%s done\n", __FUNCTION__); */
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86Normal3f( GLcontext *ctx, int key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   DFN ( _x86_Normal3f, rmesa->vb.dfn_cache.Normal3f );
+   FIXUP(dfn->code, 1, 0x12345678, (int)vb.normalptr); 
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86Color4ubv( GLcontext *ctx, int key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if (key & RADEON_CP_VC_FRMT_PKCOLOR) {
+      DFN ( _x86_Color4ubv_ub, rmesa->vb.dfn_cache.Color4ubv);
+      FIXUP(dfn->code, 5, 0x12345678, (int)vb.colorptr); 
+      return dfn;
+   } 
+   else {
+
+      DFN ( _x86_Color4ubv_4f, rmesa->vb.dfn_cache.Color4ubv);
+      FIXUP(dfn->code, 2, 0x00000000, (int)_mesa_ubyte_to_float_color_tab); 
+      FIXUP(dfn->code, 27, 0xdeadbeaf, (int)vb.floatcolorptr); 
+      FIXUP(dfn->code, 33, 0xdeadbeaf, (int)vb.floatcolorptr+4); 
+      FIXUP(dfn->code, 55, 0xdeadbeaf, (int)vb.floatcolorptr+8); 
+      FIXUP(dfn->code, 61, 0xdeadbeaf, (int)vb.floatcolorptr+12); 
+      return dfn;
+   }
+}
+
+struct dynfn *radeon_makeX86Color4ub( GLcontext *ctx, int key )
+{
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if (key & RADEON_CP_VC_FRMT_PKCOLOR) {
+      struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+      DFN ( _x86_Color4ub_ub, rmesa->vb.dfn_cache.Color4ub );
+      FIXUP(dfn->code, 18, 0x0, (int)vb.colorptr); 
+      FIXUP(dfn->code, 24, 0x0, (int)vb.colorptr+1); 
+      FIXUP(dfn->code, 30, 0x0, (int)vb.colorptr+2); 
+      FIXUP(dfn->code, 36, 0x0, (int)vb.colorptr+3); 
+      return dfn;
+   }
+   else
+      return 0;
+}
+
+
+struct dynfn *radeon_makeX86Color3fv( GLcontext *ctx, int key )
+{
+   if (key & (RADEON_CP_VC_FRMT_PKCOLOR|RADEON_CP_VC_FRMT_FPALPHA))
+      return 0;
+   else
+   {
+      struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+      if (RADEON_DEBUG & DEBUG_CODEGEN)
+	 fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+      DFN ( _x86_Color3fv_3f, rmesa->vb.dfn_cache.Color3fv );
+      FIXUP(dfn->code, 5, 0x0, (int)vb.floatcolorptr); 
+      return dfn;
+   }
+}
+
+struct dynfn *radeon_makeX86Color3f( GLcontext *ctx, int key )
+{
+   if (key & (RADEON_CP_VC_FRMT_PKCOLOR|RADEON_CP_VC_FRMT_FPALPHA))
+      return 0;
+   else
+   {
+      struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+      if (RADEON_DEBUG & DEBUG_CODEGEN)
+	 fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+      DFN ( _x86_Color3f_3f, rmesa->vb.dfn_cache.Color3f );
+      FIXUP(dfn->code, 1, 0x12345678, (int)vb.floatcolorptr); 
+      return dfn;
+   }
+}
+
+
+
+struct dynfn *radeon_makeX86TexCoord2fv( GLcontext *ctx, int key )
+{
+
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   DFN ( _x86_TexCoord2fv, rmesa->vb.dfn_cache.TexCoord2fv );
+   FIXUP(dfn->code, 5, 0x12345678, (int)vb.texcoordptr[0]); 
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86TexCoord2f( GLcontext *ctx, int key )
+{
+
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   DFN ( _x86_TexCoord2f, rmesa->vb.dfn_cache.TexCoord2f );
+   FIXUP(dfn->code, 1, 0x12345678, (int)vb.texcoordptr[0]); 
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86MultiTexCoord2fvARB( GLcontext *ctx, int key )
+{
+#if 0
+   static  char temp[] = {
+      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */
+      0x8b, 0x4c, 0x24, 0x08,          	/* mov    0x8(%esp,1),%ecx */
+      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */
+      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */
+      0x8b, 0x11,                	/* mov    (%ecx),%edx */
+      0xc1, 0xe0, 0x03,             	/* shl    $0x3,%eax */
+      0x8b, 0x49, 0x04,             	/* mov    0x4(%ecx),%ecx */
+      0x89, 0x90, 0, 0, 0, 0,/* mov    %edx,DEST(%eax) */
+      0x89, 0x88, 0, 0, 0, 0,/* mov    %ecx,DEST+8(%eax) */
+      0xc3,                     	/* ret     */
+   };
+   static char temp2[] = {
+      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */
+      0x8b, 0x4c, 0x24, 0x08,          	/* mov    0x8(%esp,1),%ecx */
+      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */
+      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */
+      0x8b, 0x14, 0x85, 0, 0, 0, 0, /* mov    DEST(,%eax,4),%edx */
+      0x8b, 0x01,                	/* mov    (%ecx),%eax */
+      0x89, 0x02,                	/* mov    %eax,(%edx) */
+      0x8b, 0x41, 0x04,             	/* mov    0x4(%ecx),%eax */
+      0x89, 0x42, 0x04,             	/* mov    %eax,0x4(%edx) */
+      0xc3,                     	/* ret     */
+   };
+#endif
+
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
+      (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
+      DFN ( _x86_MultiTexCoord2fvARB, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 26, 0xdeadbeef, (int)vb.texcoordptr[0]);	
+      FIXUP(dfn->code, 32, 0xdeadbeef, (int)vb.texcoordptr[0]+4);
+   } else {
+      DFN ( _x86_MultiTexCoord2fvARB_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 19, 0x0, (int)vb.texcoordptr);
+   }
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86MultiTexCoord2fARB( GLcontext *ctx, 
+						int key )
+{
+#if 0
+   static  char temp[] = {
+      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */
+      0x8b, 0x54, 0x24, 0x08,          	/* mov    0x8(%esp,1),%edx */
+      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */
+      0x8b, 0x4c, 0x24, 0x0c,          	/* mov    0xc(%esp,1),%ecx */
+      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */
+      0xc1, 0xe0, 0x03,             	/* shl    $0x3,%eax */
+      0x89, 0x90, 0, 0, 0, 0,	/* mov    %edx,DEST(%eax) */
+      0x89, 0x88, 0, 0, 0, 0,	/* mov    %ecx,DEST+8(%eax) */
+      0xc3,                     	/* ret     */
+   };
+
+   static char temp2[] = {
+      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */
+      0x8b, 0x54, 0x24, 0x08,          	/* mov    0x8(%esp,1),%edx */
+      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */
+      0x8b, 0x4c, 0x24, 0x0c,          	/* mov    0xc(%esp,1),%ecx */
+      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */
+      0x8b, 0x04, 0x85, 0, 0, 0, 0,     /* mov    DEST(,%eax,4),%eax */
+      0x89, 0x10,                	/* mov    %edx,(%eax) */
+      0x89, 0x48, 0x04,             	/* mov    %ecx,0x4(%eax) */
+      0xc3,                   	        /* ret     */
+   };
+#endif
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
+       (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
+      DFN ( _x86_MultiTexCoord2fARB, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 25, 0xdeadbeef, (int)vb.texcoordptr[0]); 
+      FIXUP(dfn->code, 31, 0xdeadbeef, (int)vb.texcoordptr[0]+4); 
+   }
+   else {
+      /* Note: this might get generated multiple times, even though the
+       * actual emitted code is the same.
+       */
+      DFN ( _x86_MultiTexCoord2fARB_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 23, 0x0, (int)vb.texcoordptr); 
+   }      
+   return dfn;
+}
+
+
+void radeonInitX86Codegen( struct dfn_generators *gen )
+{
+   gen->Vertex3f = radeon_makeX86Vertex3f;
+   gen->Vertex3fv = radeon_makeX86Vertex3fv;
+   gen->Color4ub = radeon_makeX86Color4ub; /* PKCOLOR only */
+   gen->Color4ubv = radeon_makeX86Color4ubv; /* PKCOLOR only */
+   gen->Normal3f = radeon_makeX86Normal3f;
+   gen->Normal3fv = radeon_makeX86Normal3fv;
+   gen->TexCoord2f = radeon_makeX86TexCoord2f;
+   gen->TexCoord2fv = radeon_makeX86TexCoord2fv;
+   gen->MultiTexCoord2fARB = radeon_makeX86MultiTexCoord2fARB;
+   gen->MultiTexCoord2fvARB = radeon_makeX86MultiTexCoord2fvARB;
+   gen->Color3f = radeon_makeX86Color3f;
+   gen->Color3fv = radeon_makeX86Color3fv;
+
+   /* Not done:
+    */
+/*     gen->Vertex2f = radeon_makeX86Vertex2f; */
+/*     gen->Vertex2fv = radeon_makeX86Vertex2fv; */
+/*     gen->Color3ub = radeon_makeX86Color3ub; */
+/*     gen->Color3ubv = radeon_makeX86Color3ubv; */
+/*     gen->Color4f = radeon_makeX86Color4f; */
+/*     gen->Color4fv = radeon_makeX86Color4fv; */
+/*     gen->TexCoord1f = radeon_makeX86TexCoord1f; */
+/*     gen->TexCoord1fv = radeon_makeX86TexCoord1fv; */
+/*     gen->MultiTexCoord1fARB = radeon_makeX86MultiTexCoord1fARB; */
+/*     gen->MultiTexCoord1fvARB = radeon_makeX86MultiTexCoord1fvARB; */
+}
+
+
+#else 
+
+void radeonInitX86Codegen( struct dfn_generators *gen )
+{
+   (void) gen;
+}
+
+#endif
diff --git a/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxtmp_x86.S b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxtmp_x86.S
new file mode 100644
index 000000000..b9b1594a3
--- /dev/null
+++ b/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxtmp_x86.S
@@ -0,0 +1,410 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_vtxtmp_x86.S,v 1.2 2002/11/07 18:32:00 tsi Exp $ */
+/**************************************************************************
+
+Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#define GLOBL( x )	\
+.globl x;		\
+x##:
+
+.data
+.align 4
+GLOBL( _x86_Normal3fv)
+	movl 4(%esp), %eax      /* load 'v' off stack */
+	movl (%eax), %ecx       /* load v[0] */
+	movl 4(%eax), %edx      /* load v[1] */
+	movl 8(%eax), %eax      /* load v[2] */
+	movl %ecx, 0      	/* store v[0] to current vertex */
+	movl %edx, 4      	/* store v[1] to current vertex */
+	movl %eax, 8      	/* store v[2] to current vertex */
+	ret
+GLOBL ( _x86_Normal3fv_end )
+
+/*
+	vertex 3f vertex size 4
+*/
+	
+GLOBL ( _x86_Vertex3f_4 )
+	movl	(0), %ecx
+	movl	4(%esp), %eax
+	movl	8(%esp), %edx
+	movl	%eax, (%ecx)
+	movl	%edx, 4(%ecx)
+	movl	12(%esp), %eax
+	movl	(0), %edx
+	movl	%eax, 8(%ecx)
+	movl	%edx, 12(%ecx)
+	movl	(0), %eax
+	addl	$16, %ecx
+	dec 	%eax
+	movl	%ecx, (0)
+	movl	%eax, (0)
+	je	.1 
+	ret
+.1:	jmp	*0
+	
+GLOBL ( _x86_Vertex3f_4_end )
+
+/*
+	vertex 3f vertex size 6
+*/
+GLOBL ( _x86_Vertex3f_6 )
+	push	%edi
+	movl	(0), %edi
+	movl	8(%esp), %eax
+	movl	12(%esp), %edx
+	movl	16(%esp), %ecx
+	movl	%eax, (%edi)
+	movl	%edx, 4(%edi)
+	movl	%ecx, 8(%edi)
+	movl	(0), %eax
+	movl	(0), %edx
+	movl	(0), %ecx
+	movl	%eax, 12(%edi)
+	movl	%edx, 16(%edi)
+	movl	%ecx, 20(%edi)
+	addl	$24, %edi
+	movl	(0), %eax
+	movl	%edi, (0)
+	dec 	%eax
+	pop 	%edi
+	movl	%eax, (0)
+	je	.2
+	ret
+.2:	jmp	*0
+GLOBL ( _x86_Vertex3f_6_end )
+/*
+	vertex 3f generic size
+*/
+GLOBL ( _x86_Vertex3f )
+	push	%edi
+	push	%esi
+	movl	$0, %esi
+	movl	(0), %edi
+	movl	12(%esp), %eax
+	movl	16(%esp), %edx
+	movl	20(%esp), %ecx
+	movl	%eax, (%edi)
+	movl	%edx, 4(%edi)
+	movl	%ecx, 8(%edi)
+	addl	$12, %edi
+	movl	$0, %ecx
+	repz
+	movsl %ds:(%esi), %es:(%edi)
+	movl	(0), %eax
+	movl	%edi, (0)
+	dec 	%eax
+	movl	%eax, (0)
+	pop 	%esi
+	pop 	%edi
+	je  	.3
+	ret
+.3:	jmp	*0
+
+GLOBL ( _x86_Vertex3f_end )
+
+/*
+	Vertex 3fv vertex size 6
+*/
+GLOBL ( _x86_Vertex3fv_6 )
+	movl	(0), %eax
+	movl	4(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	%edx, (%eax)
+	movl	4(%ecx), %edx
+	movl	8(%ecx), %ecx
+	movl	%edx, 4(%eax)
+	movl	%ecx, 8(%eax)
+	movl	(28), %edx
+	movl	(32), %ecx
+	movl	%edx, 12(%eax)
+	movl	%ecx, 16(%eax)
+	movl	(36), %edx
+	movl	%edx, 20(%eax)
+	addl	$24, %eax
+	movl	%eax, 0
+	movl	4, %eax
+	dec 	%eax
+	movl	%eax, 4
+	je	.4
+	ret
+.4:	jmp    *8
+	
+GLOBL ( _x86_Vertex3fv_6_end )
+
+/*
+	Vertex 3fv vertex size 8
+*/
+GLOBL ( _x86_Vertex3fv_8 )
+	movl	(0), %eax
+	movl	4(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	%edx ,(%eax)
+	movl	4(%ecx) ,%edx
+	movl	8(%ecx) ,%ecx
+	movl	%edx, 4(%eax)
+	movl	%ecx, 8(%eax)
+	movl	(28), %edx
+	movl	(32), %ecx
+	movl	%edx, 12(%eax)
+	movl	%ecx, 16(%eax)
+	movl	(28), %edx
+	movl	(32), %ecx
+	movl	%edx, 20(%eax)
+	movl	%ecx, 24(%eax)
+	movl	(36), %edx
+	movl	%edx, 28(%eax)
+	addl	$32, %eax
+	movl	%eax, (0)
+	movl	4, %eax
+	dec	%eax
+	movl    %eax, (4)
+	je	.5
+	ret
+.5:	jmp    *8
+	
+GLOBL ( _x86_Vertex3fv_8_end )
+
+/*
+	Vertex 3fv generic vertex size
+*/
+GLOBL ( _x86_Vertex3fv )
+	movl	4(%esp), %edx
+	push	%edi
+	push	%esi
+	movl	(0x1010101), %edi
+	movl	(%edx), %eax
+	movl	4(%edx), %ecx
+	movl	8(%edx), %esi
+	movl	%eax, (%edi)
+	movl	%ecx, 4(%edi)
+	movl	%esi, 8(%edi)
+	addl	$12, %edi
+	movl	$6, %ecx
+	movl	$0x58, %esi
+	repz
+	movsl %ds:(%esi), %es:(%edi)
+	movl	%edi, (0x1010101)
+	movl	(0x2020202), %eax
+	pop	%esi
+	pop	%edi
+	dec	%eax
+	movl	%eax, (0x2020202)
+	je	.6
+	ret
+.6:	jmp    *0
+GLOBL ( _x86_Vertex3fv_end )
+
+/*
+	Normal 3f
+*/
+GLOBL ( _x86_Normal3f )
+	movl	$0x12345678, %edx
+	movl	4(%esp), %eax
+	movl	%eax, (%edx)
+	movl	8(%esp), %eax
+	movl	%eax, 4(%edx)
+	movl	12(%esp), %eax
+	movl	%eax, 8(%edx)
+	ret
+GLOBL ( _x86_Normal3f_end )
+
+/*
+	Color 4ubv_ub
+*/
+GLOBL ( _x86_Color4ubv_ub )
+	movl 4(%esp), %eax
+	movl $0x12345678, %edx
+	movl (%eax), %eax
+	movl %eax, (%edx)
+	ret
+GLOBL ( _x86_Color4ubv_ub_end )
+
+/*
+	Color 4ubv 4f
+*/
+GLOBL ( _x86_Color4ubv_4f )
+	push	%ebx
+	movl	$0, %edx
+	xor	%eax, %eax
+	xor	%ecx, %ecx
+	movl	8(%esp), %ebx
+	movl	(%ebx), %ebx
+	mov	%bl, %al
+	mov	%bh, %cl
+	movl	(%edx,%eax,4),%eax
+	movl	(%edx,%ecx,4),%ecx
+	movl	%eax, (0xdeadbeaf)
+	movl	%ecx, (0xdeadbeaf)
+	xor	%eax, %eax
+	xor	%ecx, %ecx
+	shr	$16, %ebx
+	mov	%bl, %al
+	mov	%bh, %cl
+	movl	(%edx,%eax,4), %eax
+	movl	(%edx,%ecx,4), %ecx
+	movl	%eax, (0xdeadbeaf)
+	movl	%ecx, (0xdeadbeaf)
+	pop	%ebx
+	ret
+GLOBL ( _x86_Color4ubv_4f_end )
+
+/*
+
+	Color4ub_ub
+*/
+GLOBL( _x86_Color4ub_ub )
+	push	%ebx
+	movl	8(%esp), %eax
+	movl	12(%esp), %edx
+	movl	16(%esp), %ecx
+	movl	20(%esp), %ebx
+	mov	%al, (0)
+	mov	%dl, (0)
+	mov	%cl, (0)
+	mov	%bl, (0)
+	pop	%ebx
+	ret
+GLOBL( _x86_Color4ub_ub_end )
+
+/*
+	Color3fv_3f
+*/
+GLOBL( _x86_Color3fv_3f )
+	movl	4(%esp), %eax
+	movl	$0, %edx
+	movl	(%eax), %ecx
+	movl	%ecx, (%edx)
+	movl	4(%eax), %ecx
+	movl	%ecx, 4(%edx)
+	movl	8(%eax), %ecx
+	movl	%ecx, 8(%edx)
+	ret
+GLOBL( _x86_Color3fv_3f_end )
+
+/*
+	Color3f_3f
+*/
+GLOBL( _x86_Color3f_3f )
+	movl	$0x12345678, %edx
+	movl	4(%esp), %eax
+	movl	%eax, (%edx)
+	movl	8(%esp,1), %eax
+	movl	%eax, 4(%edx)
+	movl	12(%esp), %eax
+	movl	%eax, 8(%edx)
+	ret
+GLOBL( _x86_Color3f_3f_end )
+
+/*
+	TexCoord2fv
+*/
+
+GLOBL( _x86_TexCoord2fv )
+	movl	4(%esp), %eax
+	movl	$0x12345678, %edx
+	movl	(%eax), %ecx
+	movl	4(%eax), %eax
+	movl	%ecx, (%edx)
+	movl	%eax, 4(%edx)
+	ret
+
+GLOBL( _x86_TexCoord2fv_end )
+/*
+	TexCoord2f
+*/
+GLOBL( _x86_TexCoord2f )
+	movl	$0x12345678, %edx
+	movl	4(%esp), %eax
+	movl	8(%esp), %ecx
+	movl	%eax, (%edx)
+	movl	%ecx, 4(%edx)
+	ret
+GLOBL( _x86_TexCoord2f_end )
+
+/*
+	MultiTexCoord2fvARB st0/st1
+*/
+GLOBL( _x86_MultiTexCoord2fvARB )
+
+	movl	4(%esp), %eax
+	movl	8(%esp), %ecx
+	sub	$0x84c0, %eax
+	and	$1, %eax
+	movl	(%ecx), %edx
+	shl	$3, %eax
+	movl	4(%ecx), %ecx
+	movl	%edx, 0xdeadbeef(%eax)
+	movl	%ecx, 0xdeadbeef(%eax)
+	ret
+GLOBL( _x86_MultiTexCoord2fvARB_end )
+/*
+	MultiTexCoord2fvARB
+*/
+
+GLOBL( _x86_MultiTexCoord2fvARB_2 )
+	movl	4(%esp,1), %eax
+	movl	8(%esp,1), %ecx
+	sub	$0x84c0, %eax
+	and	$0x1, %eax
+	movl	0(,%eax,4), %edx
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	4(%ecx), %eax
+	movl	%eax, 4(%edx)
+	ret
+
+GLOBL( _x86_MultiTexCoord2fvARB_2_end )
+
+/*
+	MultiTexCoord2fARB st0/st1
+*/
+GLOBL( _x86_MultiTexCoord2fARB )
+	movl	4(%esp), %eax
+	movl	8(%esp), %edx
+	sub	$0x84c0, %eax
+	movl	12(%esp), %ecx
+	and	$1, %eax
+	shl	$3, %eax
+	movl	%edx, 0xdeadbeef(%eax)
+	movl	%ecx, 0xdeadbeef(%eax)
+	ret
+GLOBL( _x86_MultiTexCoord2fARB_end )
+
+/*
+	MultiTexCoord2fARB
+*/
+GLOBL( _x86_MultiTexCoord2fARB_2 )
+	movl	4(%esp), %eax
+	movl	8(%esp), %edx
+	sub	$0x84c0, %eax
+	movl	12(%esp,1), %ecx
+	and	$1,%eax
+	movl	0(,%eax,4), %eax
+	movl	%edx, (%eax)
+	movl	%ecx, 4(%eax)
+	ret
+GLOBL( _x86_MultiTexCoord2fARB_2_end )